2 @message |
RecursionError: maximum recursion depth exceeded while calling a Python object
LOG DETAILS:
2025-07-01 17:49:05.325
2025-07-01 17:49:05.325 act = <firebird.qa.plugin.Action object at [hex]>
2025-07-01 17:49:05.325
2025-07-01 17:49:05.325 @pytest.mark.version('>=3')
2025-07-01 17:49:05.325 def test_1(act: Action):
2025-07-01 17:49:05.325 act.expected_stdout = expected_stdout
2025-07-01 17:49:05.325 act.execute()
2025-07-01 17:49:05.325 > assert act.clean_stdout == act.clean_expected_stdout
2025-07-01 17:49:05.325
2025-07-01 17:49:05.325 tests\bugs\core_2969_test.py:1211:
2025-07-01 17:49:05.325 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.326
2025-07-01 17:49:05.326 ops = ('==',), results = (False,)
2025-07-01 17:49:05.326 expls = ('%(py2)s\n{%(py2)s = %(py0)s.clean_stdout\n} == %(py6)s\n{%(py6)s = %(py4)s.clean_expected_stdout\n}',)
2025-07-01 17:49:05.326 each_obj = ('WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy')
2025-07-01 17:49:05.326
2025-07-01 17:49:05.326 def _call_reprcompare(
2025-07-01 17:49:05.326 ops: Sequence[str],
2025-07-01 17:49:05.326 results: Sequence[bool],
2025-07-01 17:49:05.326 expls: Sequence[str],
2025-07-01 17:49:05.326 each_obj: Sequence[object],
2025-07-01 17:49:05.326 ) -> str:
2025-07-01 17:49:05.326 for i, res, expl in zip(range(len(ops)), results, expls):
2025-07-01 17:49:05.326 try:
2025-07-01 17:49:05.326 done = not res
2025-07-01 17:49:05.326 except Exception:
2025-07-01 17:49:05.326 done = True
2025-07-01 17:49:05.327 if done:
2025-07-01 17:49:05.327 break
2025-07-01 17:49:05.327 if util._reprcompare is not None:
2025-07-01 17:49:05.327 > custom = util._reprcompare(ops[i], each_obj[i], each_obj[i + 1])
2025-07-01 17:49:05.327
2025-07-01 17:49:05.327 C:\Python3x\Lib\site-packages\_pytest\assertion\rewrite.py:499:
2025-07-01 17:49:05.327 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.327
2025-07-01 17:49:05.327 op = '=='
2025-07-01 17:49:05.327 left = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var_... yyy\n1var_997 yyy\n1var_998 yyy\n1var_999 yyy'
2025-07-01 17:49:05.327 right = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1 var... yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'
2025-07-01 17:49:05.327
2025-07-01 17:49:05.327 def callbinrepr(op, left: object, right: object) -> Optional[str]:
2025-07-01 17:49:05.327 """Call the pytest_assertrepr_compare hook and prepare the result.
2025-07-01 17:49:05.327
2025-07-01 17:49:05.327 This uses the first result from the hook and then ensures the
2025-07-01 17:49:05.328 following:
2025-07-01 17:49:05.328 * Overly verbose explanations are truncated unless configured otherwise
2025-07-01 17:49:05.328 (eg. if running in verbose mode).
2025-07-01 17:49:05.328 * Embedded newlines are escaped to help util.format_explanation()
2025-07-01 17:49:05.328 later.
2025-07-01 17:49:05.328 * If the rewrite mode is used embedded %-characters are replaced
2025-07-01 17:49:05.328 to protect later % formatting.
2025-07-01 17:49:05.328
2025-07-01 17:49:05.328 The result can be formatted by util.format_explanation() for
2025-07-01 17:49:05.328 pretty printing.
2025-07-01 17:49:05.328 """
2025-07-01 17:49:05.328 > hook_result = ihook.pytest_assertrepr_compare(
2025-07-01 17:49:05.328 config=item.config, op=op, left=left, right=right
2025-07-01 17:49:05.328 )
2025-07-01 17:49:05.328
2025-07-01 17:49:05.328 C:\Python3x\Lib\site-packages\_pytest\assertion\__init__.py:141:
2025-07-01 17:49:05.328 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.329
2025-07-01 17:49:05.329 self = <HookCaller 'pytest_assertrepr_compare'>
2025-07-01 17:49:05.329 kwargs = {'config': <_pytest.config.Config object at [hex]>, 'left': 'WAS_OVERWRITTEN CTX_KEY ...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'}
2025-07-01 17:49:05.329 firstresult = False
2025-07-01 17:49:05.329
2025-07-01 17:49:05.329 def __call__(self, **kwargs: object) -> Any:
2025-07-01 17:49:05.329 """Call the hook.
2025-07-01 17:49:05.329
2025-07-01 17:49:05.329 Only accepts keyword arguments, which should match the hook
2025-07-01 17:49:05.329 specification.
2025-07-01 17:49:05.329
2025-07-01 17:49:05.329 Returns the result(s) of calling all registered plugins, see
2025-07-01 17:49:05.329 :ref:`calling`.
2025-07-01 17:49:05.329 """
2025-07-01 17:49:05.329 assert (
2025-07-01 17:49:05.329 not self.is_historic()
2025-07-01 17:49:05.329 ), "Cannot directly call a historic hook - use call_historic instead."
2025-07-01 17:49:05.330 self._verify_all_args_are_provided(kwargs)
2025-07-01 17:49:05.330 firstresult = self.spec.opts.get("firstresult", False) if self.spec else False
2025-07-01 17:49:05.330 # Copy because plugins may register other plugins during iteration (#438).
2025-07-01 17:49:05.330 > return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult)
2025-07-01 17:49:05.330
2025-07-01 17:49:05.330 C:\Python3x\Lib\site-packages\pluggy\_hooks.py:501:
2025-07-01 17:49:05.330 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.330
2025-07-01 17:49:05.330 self = <_pytest.config.PytestPluginManager object at [hex]>
2025-07-01 17:49:05.330 hook_name = 'pytest_assertrepr_compare'
2025-07-01 17:49:05.330 methods = [<HookImpl plugin_name='assertion', plugin=<module '_pytest.assertion' from 'C:\\Python3x\\Lib\\site-packages\\_pytest...plugin_name='firebird', plugin=<module 'firebird.qa.plugin' from 'H:\\QA\\firebird-qa\\src\\firebird\\qa\\plugin.py'>>]
2025-07-01 17:49:05.330 kwargs = {'config': <_pytest.config.Config object at [hex]>, 'left': 'WAS_OVERWRITTEN CTX_KEY ...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'}
2025-07-01 17:49:05.330 firstresult = False
2025-07-01 17:49:05.330
2025-07-01 17:49:05.330 def _hookexec(
2025-07-01 17:49:05.330 self,
2025-07-01 17:49:05.331 hook_name: str,
2025-07-01 17:49:05.331 methods: Sequence[HookImpl],
2025-07-01 17:49:05.331 kwargs: Mapping[str, object],
2025-07-01 17:49:05.331 firstresult: bool,
2025-07-01 17:49:05.331 ) -> object | list[object]:
2025-07-01 17:49:05.331 # called from all hookcaller instances.
2025-07-01 17:49:05.331 # enable_tracing will set its own wrapping function at self._inner_hookexec
2025-07-01 17:49:05.331 > return self._inner_hookexec(hook_name, methods, kwargs, firstresult)
2025-07-01 17:49:05.331
2025-07-01 17:49:05.331 C:\Python3x\Lib\site-packages\pluggy\_manager.py:119:
2025-07-01 17:49:05.331 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.331
2025-07-01 17:49:05.331 config = <_pytest.config.Config object at [hex]>, op = '=='
2025-07-01 17:49:05.331 left = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var_... yyy\n1var_997 yyy\n1var_998 yyy\n1var_999 yyy'
2025-07-01 17:49:05.331 right = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1 var... yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'
2025-07-01 17:49:05.331
2025-07-01 17:49:05.331 def pytest_assertrepr_compare(config: Config, op: str, left: object, right: object) -> Optional[List[str]]:
2025-07-01 17:49:05.332 """Returns explanation for comparisons in failing assert expressions.
2025-07-01 17:49:05.332
2025-07-01 17:49:05.332 If both objects are `str`, uses `difflib.ndiff` to provide explanation.
2025-07-01 17:49:05.332 """
2025-07-01 17:49:05.332 if isinstance(left, str) and isinstance(right, str) and op == "==":
2025-07-01 17:49:05.332 # 16.11.2023, pzotov: we have to put empty string at the beginning of each comparing lists.
2025-07-01 17:49:05.332 # Otherwise first diff will be at the same line as 'assert' phrase, which causes readability be poor.
2025-07-01 17:49:05.332 #
2025-07-01 17:49:05.332 left_lines = ['']
2025-07-01 17:49:05.332 left_lines.extend(left.splitlines())
2025-07-01 17:49:05.332 right_lines = ['']
2025-07-01 17:49:05.332 right_lines.extend(right.splitlines())
2025-07-01 17:49:05.332
2025-07-01 17:49:05.332 # 16.11.2023, pzotov
2025-07-01 17:49:05.332 # ndiff output must be interpreted as following:
2025-07-01 17:49:05.332 # * "E - <some text>" ==> MISSED line (it was in EXPECTED text but absent in actual one).
2025-07-01 17:49:05.333 # * "E + <some_text>" ==> EXCESSIVE line (it is not in EXPECTED text but did appear in actual).
2025-07-01 17:49:05.333 # But for QA-purposes, this output must answer the question:
2025-07-01 17:49:05.333 # "what must be changed in ACTUAL output so that it became equal to EXPECTED"
2025-07-01 17:49:05.333 # (i.e. how to "REVERT" actual back to expected).
2025-07-01 17:49:05.333 # In order to see such result, we have to specify 'right_lines' to the 1st argument that is passed to ndiff().
2025-07-01 17:49:05.333 # ::: NB :::
2025-07-01 17:49:05.333 # We assume that all tests are written so that ACTUAL output is left side in 'assert' statement and EXPECTED
2025-07-01 17:49:05.333 # is right side, e.g: assert act.clean_stdout == act.clean_expected_stdout
2025-07-01 17:49:05.333 # This requirement is CRUCIAL if we use ndiff() instead of default pytest comparison method!
2025-07-01 17:49:05.333 #
2025-07-01 17:49:05.333 > return list(ndiff(right_lines, left_lines))
2025-07-01 17:49:05.333
2025-07-01 17:49:05.333 src\firebird\qa\plugin.py:608:
2025-07-01 17:49:05.333 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.333
2025-07-01 17:49:05.334 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.337 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.337 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.337
2025-07-01 17:49:05.337 def compare(self, a, b):
2025-07-01 17:49:05.337 r"""
2025-07-01 17:49:05.337 Compare two sequences of lines; generate the resulting delta.
2025-07-01 17:49:05.337
2025-07-01 17:49:05.337 Each sequence must contain individual single-line strings ending with
2025-07-01 17:49:05.337 newlines. Such sequences can be obtained from the `readlines()` method
2025-07-01 17:49:05.337 of file-like objects. The delta generated also consists of newline-
2025-07-01 17:49:05.338 terminated strings, ready to be printed as-is via the writelines()
2025-07-01 17:49:05.338 method of a file-like object.
2025-07-01 17:49:05.338
2025-07-01 17:49:05.338 Example:
2025-07-01 17:49:05.338
2025-07-01 17:49:05.338 >>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(True),
2025-07-01 17:49:05.338 ... 'ore\ntree\nemu\n'.splitlines(True))),
2025-07-01 17:49:05.338 ... end="")
2025-07-01 17:49:05.338 - one
2025-07-01 17:49:05.338 + ore
2025-07-01 17:49:05.338 - two
2025-07-01 17:49:05.338 - three
2025-07-01 17:49:05.338 + tree
2025-07-01 17:49:05.338 + emu
2025-07-01 17:49:05.339 """
2025-07-01 17:49:05.339
2025-07-01 17:49:05.339 cruncher = SequenceMatcher(self.linejunk, a, b)
2025-07-01 17:49:05.339 for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
2025-07-01 17:49:05.339 if tag == 'replace':
2025-07-01 17:49:05.339 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.339 elif tag == 'delete':
2025-07-01 17:49:05.339 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.339 elif tag == 'insert':
2025-07-01 17:49:05.339 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.339 elif tag == 'equal':
2025-07-01 17:49:05.339 g = self._dump(' ', a, alo, ahi)
2025-07-01 17:49:05.339 else:
2025-07-01 17:49:05.339 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.339
2025-07-01 17:49:05.339 > yield from g
2025-07-01 17:49:05.339
2025-07-01 17:49:05.340 C:\Python3x\Lib\difflib.py:872:
2025-07-01 17:49:05.340 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.340
2025-07-01 17:49:05.340 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.340 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.340 alo = 3, ahi = 1101
2025-07-01 17:49:05.340 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.340 blo = 3, bhi = 1101
2025-07-01 17:49:05.340
2025-07-01 17:49:05.340 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.340 r"""
2025-07-01 17:49:05.340 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.340 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.340 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.340 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.340
2025-07-01 17:49:05.341 Example:
2025-07-01 17:49:05.341
2025-07-01 17:49:05.341 >>> d = Differ()
2025-07-01 17:49:05.341 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.341 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.341 >>> print(''.join(results), end="")
2025-07-01 17:49:05.341 - abcDefghiJkl
2025-07-01 17:49:05.341 + abcdefGhijkl
2025-07-01 17:49:05.341 """
2025-07-01 17:49:05.341
2025-07-01 17:49:05.341 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.341 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.341 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.341 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.341 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.341
2025-07-01 17:49:05.342 # search for the pair that matches best without being identical
2025-07-01 17:49:05.342 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.342 # on junk -- unless we have to)
2025-07-01 17:49:05.342 for j in range(blo, bhi):
2025-07-01 17:49:05.342 bj = b[j]
2025-07-01 17:49:05.342 cruncher.set_seq2(bj)
2025-07-01 17:49:05.342 for i in range(alo, ahi):
2025-07-01 17:49:05.342 ai = a[i]
2025-07-01 17:49:05.342 if ai == bj:
2025-07-01 17:49:05.342 if eqi is None:
2025-07-01 17:49:05.342 eqi, eqj = i, j
2025-07-01 17:49:05.342 continue
2025-07-01 17:49:05.342 cruncher.set_seq1(ai)
2025-07-01 17:49:05.342 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.342 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.342 # compares by a factor of 3.
2025-07-01 17:49:05.342 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.343 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.343 # of the computation is cached by cruncher
2025-07-01 17:49:05.343 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.343 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.343 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.343 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.343 if best_ratio < cutoff:
2025-07-01 17:49:05.343 # no non-identical "pretty close" pair
2025-07-01 17:49:05.343 if eqi is None:
2025-07-01 17:49:05.343 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.343 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.343 return
2025-07-01 17:49:05.343 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.343 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.343 else:
2025-07-01 17:49:05.343 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.343 eqi = None
2025-07-01 17:49:05.344
2025-07-01 17:49:05.344 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.344 # identical
2025-07-01 17:49:05.344
2025-07-01 17:49:05.344 # pump out diffs from before the synch point
2025-07-01 17:49:05.344 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.344
2025-07-01 17:49:05.344 # do intraline marking on the synch pair
2025-07-01 17:49:05.344 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.344 if eqi is None:
2025-07-01 17:49:05.344 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.344 atags = btags = ""
2025-07-01 17:49:05.344 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.344 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.344 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.344 if tag == 'replace':
2025-07-01 17:49:05.344 atags += '^' * la
2025-07-01 17:49:05.345 btags += '^' * lb
2025-07-01 17:49:05.345 elif tag == 'delete':
2025-07-01 17:49:05.345 atags += '-' * la
2025-07-01 17:49:05.345 elif tag == 'insert':
2025-07-01 17:49:05.345 btags += '+' * lb
2025-07-01 17:49:05.345 elif tag == 'equal':
2025-07-01 17:49:05.345 atags += ' ' * la
2025-07-01 17:49:05.345 btags += ' ' * lb
2025-07-01 17:49:05.345 else:
2025-07-01 17:49:05.345 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.345 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.345 else:
2025-07-01 17:49:05.345 # the synch pair is identical
2025-07-01 17:49:05.345 yield ' ' + aelt
2025-07-01 17:49:05.345
2025-07-01 17:49:05.345 # pump out diffs from after the synch point
2025-07-01 17:49:05.345 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.345
2025-07-01 17:49:05.346 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.346 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.346
2025-07-01 17:49:05.346 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.346 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.346 alo = 4, ahi = 1101
2025-07-01 17:49:05.346 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.346 blo = 4, bhi = 1101
2025-07-01 17:49:05.346
2025-07-01 17:49:05.346 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.346 g = []
2025-07-01 17:49:05.346 if alo < ahi:
2025-07-01 17:49:05.346 if blo < bhi:
2025-07-01 17:49:05.346 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.346 else:
2025-07-01 17:49:05.347 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.347 elif blo < bhi:
2025-07-01 17:49:05.347 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.347
2025-07-01 17:49:05.347 > yield from g
2025-07-01 17:49:05.347
2025-07-01 17:49:05.347 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.347 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.347
2025-07-01 17:49:05.347 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.347 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.347 alo = 4, ahi = 1101
2025-07-01 17:49:05.347 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.347 blo = 4, bhi = 1101
2025-07-01 17:49:05.347
2025-07-01 17:49:05.347 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.347 r"""
2025-07-01 17:49:05.348 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.348 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.348 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.348 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.348
2025-07-01 17:49:05.348 Example:
2025-07-01 17:49:05.348
2025-07-01 17:49:05.348 >>> d = Differ()
2025-07-01 17:49:05.348 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.348 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.348 >>> print(''.join(results), end="")
2025-07-01 17:49:05.348 - abcDefghiJkl
2025-07-01 17:49:05.348 + abcdefGhijkl
2025-07-01 17:49:05.348 """
2025-07-01 17:49:05.348
2025-07-01 17:49:05.349 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.349 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.349 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.349 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.349 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.349
2025-07-01 17:49:05.349 # search for the pair that matches best without being identical
2025-07-01 17:49:05.349 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.349 # on junk -- unless we have to)
2025-07-01 17:49:05.349 for j in range(blo, bhi):
2025-07-01 17:49:05.349 bj = b[j]
2025-07-01 17:49:05.349 cruncher.set_seq2(bj)
2025-07-01 17:49:05.349 for i in range(alo, ahi):
2025-07-01 17:49:05.349 ai = a[i]
2025-07-01 17:49:05.349 if ai == bj:
2025-07-01 17:49:05.349 if eqi is None:
2025-07-01 17:49:05.349 eqi, eqj = i, j
2025-07-01 17:49:05.353 continue
2025-07-01 17:49:05.353 cruncher.set_seq1(ai)
2025-07-01 17:49:05.353 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.353 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.353 # compares by a factor of 3.
2025-07-01 17:49:05.353 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.353 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.353 # of the computation is cached by cruncher
2025-07-01 17:49:05.353 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.353 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.353 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.354 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.354 if best_ratio < cutoff:
2025-07-01 17:49:05.354 # no non-identical "pretty close" pair
2025-07-01 17:49:05.354 if eqi is None:
2025-07-01 17:49:05.354 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.354 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.354 return
2025-07-01 17:49:05.354 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.354 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.354 else:
2025-07-01 17:49:05.354 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.354 eqi = None
2025-07-01 17:49:05.354
2025-07-01 17:49:05.354 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.354 # identical
2025-07-01 17:49:05.354
2025-07-01 17:49:05.354 # pump out diffs from before the synch point
2025-07-01 17:49:05.355 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.355
2025-07-01 17:49:05.355 # do intraline marking on the synch pair
2025-07-01 17:49:05.355 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.355 if eqi is None:
2025-07-01 17:49:05.355 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.355 atags = btags = ""
2025-07-01 17:49:05.355 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.355 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.355 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.355 if tag == 'replace':
2025-07-01 17:49:05.355 atags += '^' * la
2025-07-01 17:49:05.355 btags += '^' * lb
2025-07-01 17:49:05.355 elif tag == 'delete':
2025-07-01 17:49:05.355 atags += '-' * la
2025-07-01 17:49:05.355 elif tag == 'insert':
2025-07-01 17:49:05.355 btags += '+' * lb
2025-07-01 17:49:05.355 elif tag == 'equal':
2025-07-01 17:49:05.356 atags += ' ' * la
2025-07-01 17:49:05.356 btags += ' ' * lb
2025-07-01 17:49:05.356 else:
2025-07-01 17:49:05.356 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.356 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.356 else:
2025-07-01 17:49:05.356 # the synch pair is identical
2025-07-01 17:49:05.356 yield ' ' + aelt
2025-07-01 17:49:05.356
2025-07-01 17:49:05.356 # pump out diffs from after the synch point
2025-07-01 17:49:05.356 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.356
2025-07-01 17:49:05.356 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.356 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.356
2025-07-01 17:49:05.356 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.356 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.357 alo = 5, ahi = 1101
2025-07-01 17:49:05.357 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.357 blo = 5, bhi = 1101
2025-07-01 17:49:05.357
2025-07-01 17:49:05.357 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.357 g = []
2025-07-01 17:49:05.357 if alo < ahi:
2025-07-01 17:49:05.357 if blo < bhi:
2025-07-01 17:49:05.357 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.357 else:
2025-07-01 17:49:05.357 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.357 elif blo < bhi:
2025-07-01 17:49:05.357 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.357
2025-07-01 17:49:05.357 > yield from g
2025-07-01 17:49:05.357
2025-07-01 17:49:05.357 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.358 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.358
2025-07-01 17:49:05.358 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.358 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.358 alo = 5, ahi = 1101
2025-07-01 17:49:05.358 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.358 blo = 5, bhi = 1101
2025-07-01 17:49:05.358
2025-07-01 17:49:05.358 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.358 r"""
2025-07-01 17:49:05.358 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.358 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.358 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.358 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.358
2025-07-01 17:49:05.358 Example:
2025-07-01 17:49:05.358
2025-07-01 17:49:05.359 >>> d = Differ()
2025-07-01 17:49:05.359 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.359 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.359 >>> print(''.join(results), end="")
2025-07-01 17:49:05.359 - abcDefghiJkl
2025-07-01 17:49:05.359 + abcdefGhijkl
2025-07-01 17:49:05.359 """
2025-07-01 17:49:05.359
2025-07-01 17:49:05.359 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.359 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.359 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.359 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.359 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.359
2025-07-01 17:49:05.359 # search for the pair that matches best without being identical
2025-07-01 17:49:05.359 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.360 # on junk -- unless we have to)
2025-07-01 17:49:05.360 for j in range(blo, bhi):
2025-07-01 17:49:05.360 bj = b[j]
2025-07-01 17:49:05.360 cruncher.set_seq2(bj)
2025-07-01 17:49:05.360 for i in range(alo, ahi):
2025-07-01 17:49:05.360 ai = a[i]
2025-07-01 17:49:05.360 if ai == bj:
2025-07-01 17:49:05.360 if eqi is None:
2025-07-01 17:49:05.360 eqi, eqj = i, j
2025-07-01 17:49:05.360 continue
2025-07-01 17:49:05.360 cruncher.set_seq1(ai)
2025-07-01 17:49:05.360 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.360 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.360 # compares by a factor of 3.
2025-07-01 17:49:05.360 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.360 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.361 # of the computation is cached by cruncher
2025-07-01 17:49:05.361 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.361 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.361 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.361 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.361 if best_ratio < cutoff:
2025-07-01 17:49:05.361 # no non-identical "pretty close" pair
2025-07-01 17:49:05.361 if eqi is None:
2025-07-01 17:49:05.361 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.361 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.361 return
2025-07-01 17:49:05.361 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.361 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.361 else:
2025-07-01 17:49:05.361 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.361 eqi = None
2025-07-01 17:49:05.362
2025-07-01 17:49:05.362 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.362 # identical
2025-07-01 17:49:05.362
2025-07-01 17:49:05.362 # pump out diffs from before the synch point
2025-07-01 17:49:05.362 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.362
2025-07-01 17:49:05.362 # do intraline marking on the synch pair
2025-07-01 17:49:05.362 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.362 if eqi is None:
2025-07-01 17:49:05.362 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.362 atags = btags = ""
2025-07-01 17:49:05.362 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.362 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.362 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.362 if tag == 'replace':
2025-07-01 17:49:05.362 atags += '^' * la
2025-07-01 17:49:05.362 btags += '^' * lb
2025-07-01 17:49:05.363 elif tag == 'delete':
2025-07-01 17:49:05.363 atags += '-' * la
2025-07-01 17:49:05.363 elif tag == 'insert':
2025-07-01 17:49:05.363 btags += '+' * lb
2025-07-01 17:49:05.363 elif tag == 'equal':
2025-07-01 17:49:05.363 atags += ' ' * la
2025-07-01 17:49:05.363 btags += ' ' * lb
2025-07-01 17:49:05.363 else:
2025-07-01 17:49:05.363 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.363 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.363 else:
2025-07-01 17:49:05.363 # the synch pair is identical
2025-07-01 17:49:05.363 yield ' ' + aelt
2025-07-01 17:49:05.363
2025-07-01 17:49:05.363 # pump out diffs from after the synch point
2025-07-01 17:49:05.363 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.363
2025-07-01 17:49:05.363 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.364 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.364
2025-07-01 17:49:05.364 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.364 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.364 alo = 6, ahi = 1101
2025-07-01 17:49:05.364 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.364 blo = 6, bhi = 1101
2025-07-01 17:49:05.364
2025-07-01 17:49:05.364 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.364 g = []
2025-07-01 17:49:05.364 if alo < ahi:
2025-07-01 17:49:05.364 if blo < bhi:
2025-07-01 17:49:05.364 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.364 else:
2025-07-01 17:49:05.364 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.364 elif blo < bhi:
2025-07-01 17:49:05.364 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.365
2025-07-01 17:49:05.368 > yield from g
2025-07-01 17:49:05.368
2025-07-01 17:49:05.368 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.368 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.368
2025-07-01 17:49:05.368 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.368 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.368 alo = 6, ahi = 1101
2025-07-01 17:49:05.368 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.368 blo = 6, bhi = 1101
2025-07-01 17:49:05.368
2025-07-01 17:49:05.368 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.368 r"""
2025-07-01 17:49:05.368 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.368 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.368 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.368 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.368
2025-07-01 17:49:05.368 Example:
2025-07-01 17:49:05.368
2025-07-01 17:49:05.369 >>> d = Differ()
2025-07-01 17:49:05.369 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.369 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.369 >>> print(''.join(results), end="")
2025-07-01 17:49:05.369 - abcDefghiJkl
2025-07-01 17:49:05.369 + abcdefGhijkl
2025-07-01 17:49:05.369 """
2025-07-01 17:49:05.369
2025-07-01 17:49:05.369 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.369 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.369 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.369 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.369 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.369
2025-07-01 17:49:05.369 # search for the pair that matches best without being identical
2025-07-01 17:49:05.369 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.369 # on junk -- unless we have to)
2025-07-01 17:49:05.369 for j in range(blo, bhi):
2025-07-01 17:49:05.369 bj = b[j]
2025-07-01 17:49:05.370 cruncher.set_seq2(bj)
2025-07-01 17:49:05.370 for i in range(alo, ahi):
2025-07-01 17:49:05.370 ai = a[i]
2025-07-01 17:49:05.370 if ai == bj:
2025-07-01 17:49:05.370 if eqi is None:
2025-07-01 17:49:05.370 eqi, eqj = i, j
2025-07-01 17:49:05.370 continue
2025-07-01 17:49:05.370 cruncher.set_seq1(ai)
2025-07-01 17:49:05.370 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.370 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.370 # compares by a factor of 3.
2025-07-01 17:49:05.370 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.370 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.370 # of the computation is cached by cruncher
2025-07-01 17:49:05.370 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.370 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.370 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.370 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.370 if best_ratio < cutoff:
2025-07-01 17:49:05.370 # no non-identical "pretty close" pair
2025-07-01 17:49:05.370 if eqi is None:
2025-07-01 17:49:05.371 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.371 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.371 return
2025-07-01 17:49:05.371 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.371 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.371 else:
2025-07-01 17:49:05.371 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.371 eqi = None
2025-07-01 17:49:05.371
2025-07-01 17:49:05.371 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.371 # identical
2025-07-01 17:49:05.371
2025-07-01 17:49:05.371 # pump out diffs from before the synch point
2025-07-01 17:49:05.371 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.371
2025-07-01 17:49:05.371 # do intraline marking on the synch pair
2025-07-01 17:49:05.371 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.371 if eqi is None:
2025-07-01 17:49:05.372 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.372 atags = btags = ""
2025-07-01 17:49:05.372 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.372 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.372 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.372 if tag == 'replace':
2025-07-01 17:49:05.372 atags += '^' * la
2025-07-01 17:49:05.372 btags += '^' * lb
2025-07-01 17:49:05.372 elif tag == 'delete':
2025-07-01 17:49:05.372 atags += '-' * la
2025-07-01 17:49:05.372 elif tag == 'insert':
2025-07-01 17:49:05.372 btags += '+' * lb
2025-07-01 17:49:05.372 elif tag == 'equal':
2025-07-01 17:49:05.372 atags += ' ' * la
2025-07-01 17:49:05.372 btags += ' ' * lb
2025-07-01 17:49:05.372 else:
2025-07-01 17:49:05.372 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.372 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.372 else:
2025-07-01 17:49:05.372 # the synch pair is identical
2025-07-01 17:49:05.373 yield ' ' + aelt
2025-07-01 17:49:05.373
2025-07-01 17:49:05.373 # pump out diffs from after the synch point
2025-07-01 17:49:05.373 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.373
2025-07-01 17:49:05.373 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.373 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.373
2025-07-01 17:49:05.373 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.373 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.373 alo = 7, ahi = 1101
2025-07-01 17:49:05.373 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.373 blo = 7, bhi = 1101
2025-07-01 17:49:05.373
2025-07-01 17:49:05.373 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.373 g = []
2025-07-01 17:49:05.373 if alo < ahi:
2025-07-01 17:49:05.373 if blo < bhi:
2025-07-01 17:49:05.373 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.373 else:
2025-07-01 17:49:05.374 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.374 elif blo < bhi:
2025-07-01 17:49:05.374 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.374
2025-07-01 17:49:05.374 > yield from g
2025-07-01 17:49:05.374
2025-07-01 17:49:05.374 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.374 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.374
2025-07-01 17:49:05.374 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.374 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.374 alo = 7, ahi = 1101
2025-07-01 17:49:05.374 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.374 blo = 7, bhi = 1101
2025-07-01 17:49:05.374
2025-07-01 17:49:05.374 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.374 r"""
2025-07-01 17:49:05.374 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.374 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.374 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.375 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.375
2025-07-01 17:49:05.375 Example:
2025-07-01 17:49:05.375
2025-07-01 17:49:05.375 >>> d = Differ()
2025-07-01 17:49:05.375 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.375 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.375 >>> print(''.join(results), end="")
2025-07-01 17:49:05.375 - abcDefghiJkl
2025-07-01 17:49:05.375 + abcdefGhijkl
2025-07-01 17:49:05.375 """
2025-07-01 17:49:05.375
2025-07-01 17:49:05.375 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.375 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.375 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.375 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.375 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.375
2025-07-01 17:49:05.375 # search for the pair that matches best without being identical
2025-07-01 17:49:05.376 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.376 # on junk -- unless we have to)
2025-07-01 17:49:05.376 for j in range(blo, bhi):
2025-07-01 17:49:05.376 bj = b[j]
2025-07-01 17:49:05.376 cruncher.set_seq2(bj)
2025-07-01 17:49:05.376 for i in range(alo, ahi):
2025-07-01 17:49:05.376 ai = a[i]
2025-07-01 17:49:05.376 if ai == bj:
2025-07-01 17:49:05.376 if eqi is None:
2025-07-01 17:49:05.376 eqi, eqj = i, j
2025-07-01 17:49:05.376 continue
2025-07-01 17:49:05.376 cruncher.set_seq1(ai)
2025-07-01 17:49:05.376 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.376 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.376 # compares by a factor of 3.
2025-07-01 17:49:05.376 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.376 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.376 # of the computation is cached by cruncher
2025-07-01 17:49:05.376 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.376 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.377 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.377 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.377 if best_ratio < cutoff:
2025-07-01 17:49:05.377 # no non-identical "pretty close" pair
2025-07-01 17:49:05.377 if eqi is None:
2025-07-01 17:49:05.377 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.377 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.377 return
2025-07-01 17:49:05.377 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.377 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.377 else:
2025-07-01 17:49:05.377 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.377 eqi = None
2025-07-01 17:49:05.377
2025-07-01 17:49:05.377 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.377 # identical
2025-07-01 17:49:05.377
2025-07-01 17:49:05.377 # pump out diffs from before the synch point
2025-07-01 17:49:05.377 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.377
2025-07-01 17:49:05.377 # do intraline marking on the synch pair
2025-07-01 17:49:05.378 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.378 if eqi is None:
2025-07-01 17:49:05.378 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.378 atags = btags = ""
2025-07-01 17:49:05.378 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.378 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.378 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.378 if tag == 'replace':
2025-07-01 17:49:05.378 atags += '^' * la
2025-07-01 17:49:05.378 btags += '^' * lb
2025-07-01 17:49:05.378 elif tag == 'delete':
2025-07-01 17:49:05.378 atags += '-' * la
2025-07-01 17:49:05.378 elif tag == 'insert':
2025-07-01 17:49:05.378 btags += '+' * lb
2025-07-01 17:49:05.378 elif tag == 'equal':
2025-07-01 17:49:05.378 atags += ' ' * la
2025-07-01 17:49:05.378 btags += ' ' * lb
2025-07-01 17:49:05.378 else:
2025-07-01 17:49:05.378 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.378 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.379 else:
2025-07-01 17:49:05.379 # the synch pair is identical
2025-07-01 17:49:05.379 yield ' ' + aelt
2025-07-01 17:49:05.379
2025-07-01 17:49:05.379 # pump out diffs from after the synch point
2025-07-01 17:49:05.379 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.379
2025-07-01 17:49:05.379 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.379 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.379
2025-07-01 17:49:05.379 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.379 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.379 alo = 8, ahi = 1101
2025-07-01 17:49:05.379 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.379 blo = 8, bhi = 1101
2025-07-01 17:49:05.379
2025-07-01 17:49:05.379 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.379 g = []
2025-07-01 17:49:05.379 if alo < ahi:
2025-07-01 17:49:05.379 if blo < bhi:
2025-07-01 17:49:05.379 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.383 else:
2025-07-01 17:49:05.383 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.383 elif blo < bhi:
2025-07-01 17:49:05.383 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.383
2025-07-01 17:49:05.383 > yield from g
2025-07-01 17:49:05.383
2025-07-01 17:49:05.383 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.383 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.383
2025-07-01 17:49:05.383 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.383 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.383 alo = 8, ahi = 1101
2025-07-01 17:49:05.383 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.383 blo = 8, bhi = 1101
2025-07-01 17:49:05.383
2025-07-01 17:49:05.383 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.383 r"""
2025-07-01 17:49:05.383 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.384 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.384 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.384 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.384
2025-07-01 17:49:05.384 Example:
2025-07-01 17:49:05.384
2025-07-01 17:49:05.384 >>> d = Differ()
2025-07-01 17:49:05.384 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.384 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.384 >>> print(''.join(results), end="")
2025-07-01 17:49:05.384 - abcDefghiJkl
2025-07-01 17:49:05.384 + abcdefGhijkl
2025-07-01 17:49:05.384 """
2025-07-01 17:49:05.384
2025-07-01 17:49:05.384 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.384 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.384 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.384 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.384 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.385
2025-07-01 17:49:05.385 # search for the pair that matches best without being identical
2025-07-01 17:49:05.385 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.385 # on junk -- unless we have to)
2025-07-01 17:49:05.385 for j in range(blo, bhi):
2025-07-01 17:49:05.385 bj = b[j]
2025-07-01 17:49:05.385 cruncher.set_seq2(bj)
2025-07-01 17:49:05.385 for i in range(alo, ahi):
2025-07-01 17:49:05.385 ai = a[i]
2025-07-01 17:49:05.385 if ai == bj:
2025-07-01 17:49:05.385 if eqi is None:
2025-07-01 17:49:05.385 eqi, eqj = i, j
2025-07-01 17:49:05.385 continue
2025-07-01 17:49:05.385 cruncher.set_seq1(ai)
2025-07-01 17:49:05.385 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.385 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.385 # compares by a factor of 3.
2025-07-01 17:49:05.385 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.385 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.386 # of the computation is cached by cruncher
2025-07-01 17:49:05.386 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.386 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.386 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.386 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.386 if best_ratio < cutoff:
2025-07-01 17:49:05.386 # no non-identical "pretty close" pair
2025-07-01 17:49:05.386 if eqi is None:
2025-07-01 17:49:05.386 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.386 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.386 return
2025-07-01 17:49:05.386 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.386 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.386 else:
2025-07-01 17:49:05.386 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.386 eqi = None
2025-07-01 17:49:05.386
2025-07-01 17:49:05.386 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.386 # identical
2025-07-01 17:49:05.386
2025-07-01 17:49:05.386 # pump out diffs from before the synch point
2025-07-01 17:49:05.387 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.387
2025-07-01 17:49:05.387 # do intraline marking on the synch pair
2025-07-01 17:49:05.387 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.387 if eqi is None:
2025-07-01 17:49:05.387 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.387 atags = btags = ""
2025-07-01 17:49:05.387 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.387 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.387 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.387 if tag == 'replace':
2025-07-01 17:49:05.387 atags += '^' * la
2025-07-01 17:49:05.387 btags += '^' * lb
2025-07-01 17:49:05.387 elif tag == 'delete':
2025-07-01 17:49:05.387 atags += '-' * la
2025-07-01 17:49:05.387 elif tag == 'insert':
2025-07-01 17:49:05.387 btags += '+' * lb
2025-07-01 17:49:05.387 elif tag == 'equal':
2025-07-01 17:49:05.387 atags += ' ' * la
2025-07-01 17:49:05.387 btags += ' ' * lb
2025-07-01 17:49:05.388 else:
2025-07-01 17:49:05.388 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.388 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.388 else:
2025-07-01 17:49:05.388 # the synch pair is identical
2025-07-01 17:49:05.388 yield ' ' + aelt
2025-07-01 17:49:05.388
2025-07-01 17:49:05.388 # pump out diffs from after the synch point
2025-07-01 17:49:05.388 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.388
2025-07-01 17:49:05.388 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.388 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.388
2025-07-01 17:49:05.388 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.388 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.388 alo = 9, ahi = 1101
2025-07-01 17:49:05.388 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.388 blo = 9, bhi = 1101
2025-07-01 17:49:05.388
2025-07-01 17:49:05.388 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.388 g = []
2025-07-01 17:49:05.389 if alo < ahi:
2025-07-01 17:49:05.389 if blo < bhi:
2025-07-01 17:49:05.389 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.389 else:
2025-07-01 17:49:05.389 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.389 elif blo < bhi:
2025-07-01 17:49:05.389 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.389
2025-07-01 17:49:05.389 > yield from g
2025-07-01 17:49:05.389
2025-07-01 17:49:05.389 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.389 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.389
2025-07-01 17:49:05.389 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.389 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.389 alo = 9, ahi = 1101
2025-07-01 17:49:05.389 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.389 blo = 9, bhi = 1101
2025-07-01 17:49:05.389
2025-07-01 17:49:05.389 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.390 r"""
2025-07-01 17:49:05.390 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.390 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.390 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.390 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.390
2025-07-01 17:49:05.390 Example:
2025-07-01 17:49:05.390
2025-07-01 17:49:05.390 >>> d = Differ()
2025-07-01 17:49:05.390 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.390 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.390 >>> print(''.join(results), end="")
2025-07-01 17:49:05.390 - abcDefghiJkl
2025-07-01 17:49:05.390 + abcdefGhijkl
2025-07-01 17:49:05.390 """
2025-07-01 17:49:05.390
2025-07-01 17:49:05.390 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.390 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.390 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.391 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.391 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.391
2025-07-01 17:49:05.391 # search for the pair that matches best without being identical
2025-07-01 17:49:05.391 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.391 # on junk -- unless we have to)
2025-07-01 17:49:05.391 for j in range(blo, bhi):
2025-07-01 17:49:05.391 bj = b[j]
2025-07-01 17:49:05.391 cruncher.set_seq2(bj)
2025-07-01 17:49:05.391 for i in range(alo, ahi):
2025-07-01 17:49:05.391 ai = a[i]
2025-07-01 17:49:05.391 if ai == bj:
2025-07-01 17:49:05.391 if eqi is None:
2025-07-01 17:49:05.391 eqi, eqj = i, j
2025-07-01 17:49:05.391 continue
2025-07-01 17:49:05.391 cruncher.set_seq1(ai)
2025-07-01 17:49:05.391 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.391 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.391 # compares by a factor of 3.
2025-07-01 17:49:05.391 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.392 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.392 # of the computation is cached by cruncher
2025-07-01 17:49:05.392 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.392 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.392 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.392 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.392 if best_ratio < cutoff:
2025-07-01 17:49:05.392 # no non-identical "pretty close" pair
2025-07-01 17:49:05.392 if eqi is None:
2025-07-01 17:49:05.392 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.392 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.392 return
2025-07-01 17:49:05.392 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.392 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.392 else:
2025-07-01 17:49:05.392 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.392 eqi = None
2025-07-01 17:49:05.392
2025-07-01 17:49:05.392 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.392 # identical
2025-07-01 17:49:05.392
2025-07-01 17:49:05.393 # pump out diffs from before the synch point
2025-07-01 17:49:05.393 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.393
2025-07-01 17:49:05.393 # do intraline marking on the synch pair
2025-07-01 17:49:05.393 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.393 if eqi is None:
2025-07-01 17:49:05.393 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.393 atags = btags = ""
2025-07-01 17:49:05.393 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.393 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.393 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.393 if tag == 'replace':
2025-07-01 17:49:05.393 atags += '^' * la
2025-07-01 17:49:05.393 btags += '^' * lb
2025-07-01 17:49:05.393 elif tag == 'delete':
2025-07-01 17:49:05.393 atags += '-' * la
2025-07-01 17:49:05.393 elif tag == 'insert':
2025-07-01 17:49:05.393 btags += '+' * lb
2025-07-01 17:49:05.393 elif tag == 'equal':
2025-07-01 17:49:05.393 atags += ' ' * la
2025-07-01 17:49:05.394 btags += ' ' * lb
2025-07-01 17:49:05.394 else:
2025-07-01 17:49:05.394 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.394 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.394 else:
2025-07-01 17:49:05.394 # the synch pair is identical
2025-07-01 17:49:05.394 yield ' ' + aelt
2025-07-01 17:49:05.394
2025-07-01 17:49:05.394 # pump out diffs from after the synch point
2025-07-01 17:49:05.394 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.394
2025-07-01 17:49:05.394 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.394 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.394
2025-07-01 17:49:05.394 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.394 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.394 alo = 10, ahi = 1101
2025-07-01 17:49:05.394 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.394 blo = 10, bhi = 1101
2025-07-01 17:49:05.394
2025-07-01 17:49:05.394 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.397 g = []
2025-07-01 17:49:05.398 if alo < ahi:
2025-07-01 17:49:05.398 if blo < bhi:
2025-07-01 17:49:05.398 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.398 else:
2025-07-01 17:49:05.398 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.398 elif blo < bhi:
2025-07-01 17:49:05.398 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.398
2025-07-01 17:49:05.398 > yield from g
2025-07-01 17:49:05.398
2025-07-01 17:49:05.398 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.398 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.398
2025-07-01 17:49:05.398 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.398 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.398 alo = 10, ahi = 1101
2025-07-01 17:49:05.398 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.398 blo = 10, bhi = 1101
2025-07-01 17:49:05.398
2025-07-01 17:49:05.398 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.399 r"""
2025-07-01 17:49:05.399 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.399 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.399 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.399 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.399
2025-07-01 17:49:05.399 Example:
2025-07-01 17:49:05.399
2025-07-01 17:49:05.399 >>> d = Differ()
2025-07-01 17:49:05.399 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.399 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.399 >>> print(''.join(results), end="")
2025-07-01 17:49:05.399 - abcDefghiJkl
2025-07-01 17:49:05.399 + abcdefGhijkl
2025-07-01 17:49:05.399 """
2025-07-01 17:49:05.399
2025-07-01 17:49:05.399 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.399 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.400 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.400 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.400 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.400
2025-07-01 17:49:05.400 # search for the pair that matches best without being identical
2025-07-01 17:49:05.400 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.400 # on junk -- unless we have to)
2025-07-01 17:49:05.400 for j in range(blo, bhi):
2025-07-01 17:49:05.400 bj = b[j]
2025-07-01 17:49:05.400 cruncher.set_seq2(bj)
2025-07-01 17:49:05.400 for i in range(alo, ahi):
2025-07-01 17:49:05.400 ai = a[i]
2025-07-01 17:49:05.400 if ai == bj:
2025-07-01 17:49:05.400 if eqi is None:
2025-07-01 17:49:05.400 eqi, eqj = i, j
2025-07-01 17:49:05.400 continue
2025-07-01 17:49:05.400 cruncher.set_seq1(ai)
2025-07-01 17:49:05.400 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.400 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.400 # compares by a factor of 3.
2025-07-01 17:49:05.400 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.401 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.401 # of the computation is cached by cruncher
2025-07-01 17:49:05.401 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.401 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.401 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.401 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.401 if best_ratio < cutoff:
2025-07-01 17:49:05.401 # no non-identical "pretty close" pair
2025-07-01 17:49:05.401 if eqi is None:
2025-07-01 17:49:05.401 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.401 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.401 return
2025-07-01 17:49:05.401 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.401 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.401 else:
2025-07-01 17:49:05.401 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.401 eqi = None
2025-07-01 17:49:05.401
2025-07-01 17:49:05.401 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.401 # identical
2025-07-01 17:49:05.402
2025-07-01 17:49:05.402 # pump out diffs from before the synch point
2025-07-01 17:49:05.402 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.402
2025-07-01 17:49:05.402 # do intraline marking on the synch pair
2025-07-01 17:49:05.402 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.402 if eqi is None:
2025-07-01 17:49:05.402 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.402 atags = btags = ""
2025-07-01 17:49:05.402 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.402 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.402 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.402 if tag == 'replace':
2025-07-01 17:49:05.402 atags += '^' * la
2025-07-01 17:49:05.402 btags += '^' * lb
2025-07-01 17:49:05.402 elif tag == 'delete':
2025-07-01 17:49:05.403 atags += '-' * la
2025-07-01 17:49:05.403 elif tag == 'insert':
2025-07-01 17:49:05.403 btags += '+' * lb
2025-07-01 17:49:05.403 elif tag == 'equal':
2025-07-01 17:49:05.403 atags += ' ' * la
2025-07-01 17:49:05.403 btags += ' ' * lb
2025-07-01 17:49:05.403 else:
2025-07-01 17:49:05.403 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.403 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.403 else:
2025-07-01 17:49:05.403 # the synch pair is identical
2025-07-01 17:49:05.403 yield ' ' + aelt
2025-07-01 17:49:05.403
2025-07-01 17:49:05.403 # pump out diffs from after the synch point
2025-07-01 17:49:05.403 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.403
2025-07-01 17:49:05.403 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.403 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.403
2025-07-01 17:49:05.403 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.404 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.404 alo = 11, ahi = 1101
2025-07-01 17:49:05.404 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.404 blo = 11, bhi = 1101
2025-07-01 17:49:05.404
2025-07-01 17:49:05.404 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.404 g = []
2025-07-01 17:49:05.404 if alo < ahi:
2025-07-01 17:49:05.404 if blo < bhi:
2025-07-01 17:49:05.404 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.404 else:
2025-07-01 17:49:05.404 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.404 elif blo < bhi:
2025-07-01 17:49:05.404 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.404
2025-07-01 17:49:05.404 > yield from g
2025-07-01 17:49:05.404
2025-07-01 17:49:05.404 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.404 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.404
2025-07-01 17:49:05.404 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.405 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.405 alo = 11, ahi = 1101
2025-07-01 17:49:05.405 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.405 blo = 11, bhi = 1101
2025-07-01 17:49:05.405
2025-07-01 17:49:05.405 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.405 r"""
2025-07-01 17:49:05.405 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.405 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.405 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.405 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.405
2025-07-01 17:49:05.405 Example:
2025-07-01 17:49:05.405
2025-07-01 17:49:05.405 >>> d = Differ()
2025-07-01 17:49:05.405 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.405 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.405 >>> print(''.join(results), end="")
2025-07-01 17:49:05.405 - abcDefghiJkl
2025-07-01 17:49:05.406 + abcdefGhijkl
2025-07-01 17:49:05.406 """
2025-07-01 17:49:05.406
2025-07-01 17:49:05.406 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.406 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.406 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.406 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.406 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.406
2025-07-01 17:49:05.406 # search for the pair that matches best without being identical
2025-07-01 17:49:05.406 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.406 # on junk -- unless we have to)
2025-07-01 17:49:05.406 for j in range(blo, bhi):
2025-07-01 17:49:05.406 bj = b[j]
2025-07-01 17:49:05.406 cruncher.set_seq2(bj)
2025-07-01 17:49:05.406 for i in range(alo, ahi):
2025-07-01 17:49:05.406 ai = a[i]
2025-07-01 17:49:05.406 if ai == bj:
2025-07-01 17:49:05.406 if eqi is None:
2025-07-01 17:49:05.406 eqi, eqj = i, j
2025-07-01 17:49:05.407 continue
2025-07-01 17:49:05.407 cruncher.set_seq1(ai)
2025-07-01 17:49:05.407 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.407 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.407 # compares by a factor of 3.
2025-07-01 17:49:05.407 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.407 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.407 # of the computation is cached by cruncher
2025-07-01 17:49:05.407 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.407 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.407 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.407 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.407 if best_ratio < cutoff:
2025-07-01 17:49:05.407 # no non-identical "pretty close" pair
2025-07-01 17:49:05.407 if eqi is None:
2025-07-01 17:49:05.407 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.407 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.407 return
2025-07-01 17:49:05.407 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.407 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.408 else:
2025-07-01 17:49:05.408 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.408 eqi = None
2025-07-01 17:49:05.408
2025-07-01 17:49:05.408 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.408 # identical
2025-07-01 17:49:05.408
2025-07-01 17:49:05.408 # pump out diffs from before the synch point
2025-07-01 17:49:05.408 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.408
2025-07-01 17:49:05.408 # do intraline marking on the synch pair
2025-07-01 17:49:05.408 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.408 if eqi is None:
2025-07-01 17:49:05.408 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.408 atags = btags = ""
2025-07-01 17:49:05.408 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.408 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.408 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.408 if tag == 'replace':
2025-07-01 17:49:05.408 atags += '^' * la
2025-07-01 17:49:05.408 btags += '^' * lb
2025-07-01 17:49:05.409 elif tag == 'delete':
2025-07-01 17:49:05.409 atags += '-' * la
2025-07-01 17:49:05.409 elif tag == 'insert':
2025-07-01 17:49:05.409 btags += '+' * lb
2025-07-01 17:49:05.409 elif tag == 'equal':
2025-07-01 17:49:05.409 atags += ' ' * la
2025-07-01 17:49:05.409 btags += ' ' * lb
2025-07-01 17:49:05.409 else:
2025-07-01 17:49:05.409 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.409 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.409 else:
2025-07-01 17:49:05.409 # the synch pair is identical
2025-07-01 17:49:05.409 yield ' ' + aelt
2025-07-01 17:49:05.409
2025-07-01 17:49:05.409 # pump out diffs from after the synch point
2025-07-01 17:49:05.409 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.409
2025-07-01 17:49:05.409 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.409 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.409
2025-07-01 17:49:05.409 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.410 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.410 alo = 12, ahi = 1101
2025-07-01 17:49:05.410 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.410 blo = 12, bhi = 1101
2025-07-01 17:49:05.410
2025-07-01 17:49:05.410 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.410 g = []
2025-07-01 17:49:05.410 if alo < ahi:
2025-07-01 17:49:05.410 if blo < bhi:
2025-07-01 17:49:05.410 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.410 else:
2025-07-01 17:49:05.410 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.410 elif blo < bhi:
2025-07-01 17:49:05.410 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.410
2025-07-01 17:49:05.410 > yield from g
2025-07-01 17:49:05.410
2025-07-01 17:49:05.410 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.410 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.410
2025-07-01 17:49:05.410 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.414 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.414 alo = 12, ahi = 1101
2025-07-01 17:49:05.414 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.414 blo = 12, bhi = 1101
2025-07-01 17:49:05.414
2025-07-01 17:49:05.414 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.414 r"""
2025-07-01 17:49:05.414 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.414 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.414 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.414 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.414
2025-07-01 17:49:05.414 Example:
2025-07-01 17:49:05.414
2025-07-01 17:49:05.414 >>> d = Differ()
2025-07-01 17:49:05.414 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.414 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.414 >>> print(''.join(results), end="")
2025-07-01 17:49:05.415 - abcDefghiJkl
2025-07-01 17:49:05.415 + abcdefGhijkl
2025-07-01 17:49:05.415 """
2025-07-01 17:49:05.415
2025-07-01 17:49:05.415 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.415 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.415 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.415 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.415 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.415
2025-07-01 17:49:05.415 # search for the pair that matches best without being identical
2025-07-01 17:49:05.415 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.415 # on junk -- unless we have to)
2025-07-01 17:49:05.415 for j in range(blo, bhi):
2025-07-01 17:49:05.415 bj = b[j]
2025-07-01 17:49:05.415 cruncher.set_seq2(bj)
2025-07-01 17:49:05.415 for i in range(alo, ahi):
2025-07-01 17:49:05.415 ai = a[i]
2025-07-01 17:49:05.416 if ai == bj:
2025-07-01 17:49:05.416 if eqi is None:
2025-07-01 17:49:05.416 eqi, eqj = i, j
2025-07-01 17:49:05.416 continue
2025-07-01 17:49:05.416 cruncher.set_seq1(ai)
2025-07-01 17:49:05.416 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.416 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.416 # compares by a factor of 3.
2025-07-01 17:49:05.416 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.416 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.416 # of the computation is cached by cruncher
2025-07-01 17:49:05.416 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.416 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.416 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.416 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.416 if best_ratio < cutoff:
2025-07-01 17:49:05.416 # no non-identical "pretty close" pair
2025-07-01 17:49:05.416 if eqi is None:
2025-07-01 17:49:05.416 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.416 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.416 return
2025-07-01 17:49:05.417 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.417 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.417 else:
2025-07-01 17:49:05.417 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.417 eqi = None
2025-07-01 17:49:05.417
2025-07-01 17:49:05.417 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.417 # identical
2025-07-01 17:49:05.417
2025-07-01 17:49:05.417 # pump out diffs from before the synch point
2025-07-01 17:49:05.417 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.417
2025-07-01 17:49:05.417 # do intraline marking on the synch pair
2025-07-01 17:49:05.417 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.417 if eqi is None:
2025-07-01 17:49:05.417 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.417 atags = btags = ""
2025-07-01 17:49:05.417 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.417 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.417 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.418 if tag == 'replace':
2025-07-01 17:49:05.418 atags += '^' * la
2025-07-01 17:49:05.418 btags += '^' * lb
2025-07-01 17:49:05.418 elif tag == 'delete':
2025-07-01 17:49:05.418 atags += '-' * la
2025-07-01 17:49:05.418 elif tag == 'insert':
2025-07-01 17:49:05.418 btags += '+' * lb
2025-07-01 17:49:05.418 elif tag == 'equal':
2025-07-01 17:49:05.418 atags += ' ' * la
2025-07-01 17:49:05.418 btags += ' ' * lb
2025-07-01 17:49:05.418 else:
2025-07-01 17:49:05.418 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.418 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.418 else:
2025-07-01 17:49:05.418 # the synch pair is identical
2025-07-01 17:49:05.418 yield ' ' + aelt
2025-07-01 17:49:05.418
2025-07-01 17:49:05.418 # pump out diffs from after the synch point
2025-07-01 17:49:05.418 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.418
2025-07-01 17:49:05.418 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.419 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.419
2025-07-01 17:49:05.419 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.419 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.419 alo = 13, ahi = 1101
2025-07-01 17:49:05.419 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.419 blo = 13, bhi = 1101
2025-07-01 17:49:05.419
2025-07-01 17:49:05.419 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.419 g = []
2025-07-01 17:49:05.419 if alo < ahi:
2025-07-01 17:49:05.419 if blo < bhi:
2025-07-01 17:49:05.419 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.419 else:
2025-07-01 17:49:05.419 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.419 elif blo < bhi:
2025-07-01 17:49:05.419 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.419
2025-07-01 17:49:05.419 > yield from g
2025-07-01 17:49:05.419
2025-07-01 17:49:05.419 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.420 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.420
2025-07-01 17:49:05.420 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.420 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.420 alo = 13, ahi = 1101
2025-07-01 17:49:05.420 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.420 blo = 13, bhi = 1101
2025-07-01 17:49:05.420
2025-07-01 17:49:05.420 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.420 r"""
2025-07-01 17:49:05.420 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.420 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.420 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.420 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.420
2025-07-01 17:49:05.420 Example:
2025-07-01 17:49:05.420
2025-07-01 17:49:05.420 >>> d = Differ()
2025-07-01 17:49:05.420 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.420 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.421 >>> print(''.join(results), end="")
2025-07-01 17:49:05.421 - abcDefghiJkl
2025-07-01 17:49:05.421 + abcdefGhijkl
2025-07-01 17:49:05.421 """
2025-07-01 17:49:05.421
2025-07-01 17:49:05.421 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.421 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.421 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.421 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.421 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.421
2025-07-01 17:49:05.421 # search for the pair that matches best without being identical
2025-07-01 17:49:05.421 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.421 # on junk -- unless we have to)
2025-07-01 17:49:05.421 for j in range(blo, bhi):
2025-07-01 17:49:05.421 bj = b[j]
2025-07-01 17:49:05.421 cruncher.set_seq2(bj)
2025-07-01 17:49:05.421 for i in range(alo, ahi):
2025-07-01 17:49:05.421 ai = a[i]
2025-07-01 17:49:05.422 if ai == bj:
2025-07-01 17:49:05.422 if eqi is None:
2025-07-01 17:49:05.422 eqi, eqj = i, j
2025-07-01 17:49:05.422 continue
2025-07-01 17:49:05.422 cruncher.set_seq1(ai)
2025-07-01 17:49:05.422 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.422 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.422 # compares by a factor of 3.
2025-07-01 17:49:05.422 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.422 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.422 # of the computation is cached by cruncher
2025-07-01 17:49:05.422 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.422 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.422 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.422 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.422 if best_ratio < cutoff:
2025-07-01 17:49:05.422 # no non-identical "pretty close" pair
2025-07-01 17:49:05.422 if eqi is None:
2025-07-01 17:49:05.422 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.422 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.422 return
2025-07-01 17:49:05.423 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.423 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.423 else:
2025-07-01 17:49:05.423 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.423 eqi = None
2025-07-01 17:49:05.423
2025-07-01 17:49:05.423 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.423 # identical
2025-07-01 17:49:05.423
2025-07-01 17:49:05.423 # pump out diffs from before the synch point
2025-07-01 17:49:05.423 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.423
2025-07-01 17:49:05.423 # do intraline marking on the synch pair
2025-07-01 17:49:05.423 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.423 if eqi is None:
2025-07-01 17:49:05.423 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.423 atags = btags = ""
2025-07-01 17:49:05.423 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.423 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.424 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.424 if tag == 'replace':
2025-07-01 17:49:05.424 atags += '^' * la
2025-07-01 17:49:05.424 btags += '^' * lb
2025-07-01 17:49:05.424 elif tag == 'delete':
2025-07-01 17:49:05.424 atags += '-' * la
2025-07-01 17:49:05.424 elif tag == 'insert':
2025-07-01 17:49:05.424 btags += '+' * lb
2025-07-01 17:49:05.424 elif tag == 'equal':
2025-07-01 17:49:05.424 atags += ' ' * la
2025-07-01 17:49:05.424 btags += ' ' * lb
2025-07-01 17:49:05.424 else:
2025-07-01 17:49:05.424 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.424 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.424 else:
2025-07-01 17:49:05.424 # the synch pair is identical
2025-07-01 17:49:05.424 yield ' ' + aelt
2025-07-01 17:49:05.424
2025-07-01 17:49:05.424 # pump out diffs from after the synch point
2025-07-01 17:49:05.424 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.424
2025-07-01 17:49:05.425 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.425 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.425
2025-07-01 17:49:05.425 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.425 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.425 alo = 14, ahi = 1101
2025-07-01 17:49:05.425 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.425 blo = 14, bhi = 1101
2025-07-01 17:49:05.425
2025-07-01 17:49:05.425 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.425 g = []
2025-07-01 17:49:05.425 if alo < ahi:
2025-07-01 17:49:05.425 if blo < bhi:
2025-07-01 17:49:05.425 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.425 else:
2025-07-01 17:49:05.425 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.425 elif blo < bhi:
2025-07-01 17:49:05.425 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.425
2025-07-01 17:49:05.425 > yield from g
2025-07-01 17:49:05.425
2025-07-01 17:49:05.428 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.428 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.429
2025-07-01 17:49:05.429 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.429 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.429 alo = 14, ahi = 1101
2025-07-01 17:49:05.429 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.429 blo = 14, bhi = 1101
2025-07-01 17:49:05.429
2025-07-01 17:49:05.429 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.429 r"""
2025-07-01 17:49:05.429 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.429 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.429 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.429 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.429
2025-07-01 17:49:05.429 Example:
2025-07-01 17:49:05.429
2025-07-01 17:49:05.429 >>> d = Differ()
2025-07-01 17:49:05.429 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.429 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.429 >>> print(''.join(results), end="")
2025-07-01 17:49:05.430 - abcDefghiJkl
2025-07-01 17:49:05.430 + abcdefGhijkl
2025-07-01 17:49:05.430 """
2025-07-01 17:49:05.430
2025-07-01 17:49:05.430 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.430 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.430 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.430 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.430 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.430
2025-07-01 17:49:05.430 # search for the pair that matches best without being identical
2025-07-01 17:49:05.430 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.430 # on junk -- unless we have to)
2025-07-01 17:49:05.430 for j in range(blo, bhi):
2025-07-01 17:49:05.430 bj = b[j]
2025-07-01 17:49:05.430 cruncher.set_seq2(bj)
2025-07-01 17:49:05.430 for i in range(alo, ahi):
2025-07-01 17:49:05.430 ai = a[i]
2025-07-01 17:49:05.430 if ai == bj:
2025-07-01 17:49:05.431 if eqi is None:
2025-07-01 17:49:05.431 eqi, eqj = i, j
2025-07-01 17:49:05.431 continue
2025-07-01 17:49:05.431 cruncher.set_seq1(ai)
2025-07-01 17:49:05.431 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.431 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.431 # compares by a factor of 3.
2025-07-01 17:49:05.431 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.431 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.431 # of the computation is cached by cruncher
2025-07-01 17:49:05.431 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.431 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.431 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.431 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.431 if best_ratio < cutoff:
2025-07-01 17:49:05.431 # no non-identical "pretty close" pair
2025-07-01 17:49:05.431 if eqi is None:
2025-07-01 17:49:05.431 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.431 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.431 return
2025-07-01 17:49:05.432 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.432 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.432 else:
2025-07-01 17:49:05.432 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.432 eqi = None
2025-07-01 17:49:05.432
2025-07-01 17:49:05.432 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.432 # identical
2025-07-01 17:49:05.432
2025-07-01 17:49:05.432 # pump out diffs from before the synch point
2025-07-01 17:49:05.432 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.432
2025-07-01 17:49:05.432 # do intraline marking on the synch pair
2025-07-01 17:49:05.432 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.432 if eqi is None:
2025-07-01 17:49:05.432 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.432 atags = btags = ""
2025-07-01 17:49:05.432 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.432 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.432 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.432 if tag == 'replace':
2025-07-01 17:49:05.433 atags += '^' * la
2025-07-01 17:49:05.433 btags += '^' * lb
2025-07-01 17:49:05.433 elif tag == 'delete':
2025-07-01 17:49:05.433 atags += '-' * la
2025-07-01 17:49:05.433 elif tag == 'insert':
2025-07-01 17:49:05.433 btags += '+' * lb
2025-07-01 17:49:05.433 elif tag == 'equal':
2025-07-01 17:49:05.433 atags += ' ' * la
2025-07-01 17:49:05.433 btags += ' ' * lb
2025-07-01 17:49:05.433 else:
2025-07-01 17:49:05.433 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.433 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.433 else:
2025-07-01 17:49:05.433 # the synch pair is identical
2025-07-01 17:49:05.433 yield ' ' + aelt
2025-07-01 17:49:05.433
2025-07-01 17:49:05.433 # pump out diffs from after the synch point
2025-07-01 17:49:05.433 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.433
2025-07-01 17:49:05.433 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.433 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.434
2025-07-01 17:49:05.434 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.434 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.434 alo = 15, ahi = 1101
2025-07-01 17:49:05.434 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.434 blo = 15, bhi = 1101
2025-07-01 17:49:05.434
2025-07-01 17:49:05.434 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.434 g = []
2025-07-01 17:49:05.434 if alo < ahi:
2025-07-01 17:49:05.434 if blo < bhi:
2025-07-01 17:49:05.434 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.434 else:
2025-07-01 17:49:05.434 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.434 elif blo < bhi:
2025-07-01 17:49:05.434 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.434
2025-07-01 17:49:05.434 > yield from g
2025-07-01 17:49:05.434
2025-07-01 17:49:05.435 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.435 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.435
2025-07-01 17:49:05.435 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.435 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.435 alo = 15, ahi = 1101
2025-07-01 17:49:05.435 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.435 blo = 15, bhi = 1101
2025-07-01 17:49:05.435
2025-07-01 17:49:05.435 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.435 r"""
2025-07-01 17:49:05.435 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.435 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.435 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.435 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.435
2025-07-01 17:49:05.435 Example:
2025-07-01 17:49:05.435
2025-07-01 17:49:05.435 >>> d = Differ()
2025-07-01 17:49:05.435 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.436 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.436 >>> print(''.join(results), end="")
2025-07-01 17:49:05.436 - abcDefghiJkl
2025-07-01 17:49:05.436 + abcdefGhijkl
2025-07-01 17:49:05.436 """
2025-07-01 17:49:05.436
2025-07-01 17:49:05.436 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.436 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.436 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.436 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.436 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.436
2025-07-01 17:49:05.436 # search for the pair that matches best without being identical
2025-07-01 17:49:05.436 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.436 # on junk -- unless we have to)
2025-07-01 17:49:05.436 for j in range(blo, bhi):
2025-07-01 17:49:05.436 bj = b[j]
2025-07-01 17:49:05.436 cruncher.set_seq2(bj)
2025-07-01 17:49:05.437 for i in range(alo, ahi):
2025-07-01 17:49:05.437 ai = a[i]
2025-07-01 17:49:05.437 if ai == bj:
2025-07-01 17:49:05.437 if eqi is None:
2025-07-01 17:49:05.437 eqi, eqj = i, j
2025-07-01 17:49:05.437 continue
2025-07-01 17:49:05.437 cruncher.set_seq1(ai)
2025-07-01 17:49:05.437 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.437 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.437 # compares by a factor of 3.
2025-07-01 17:49:05.437 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.437 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.437 # of the computation is cached by cruncher
2025-07-01 17:49:05.437 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.437 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.437 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.437 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.437 if best_ratio < cutoff:
2025-07-01 17:49:05.437 # no non-identical "pretty close" pair
2025-07-01 17:49:05.437 if eqi is None:
2025-07-01 17:49:05.437 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.438 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.438 return
2025-07-01 17:49:05.438 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.438 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.438 else:
2025-07-01 17:49:05.438 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.438 eqi = None
2025-07-01 17:49:05.438
2025-07-01 17:49:05.438 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.438 # identical
2025-07-01 17:49:05.438
2025-07-01 17:49:05.438 # pump out diffs from before the synch point
2025-07-01 17:49:05.438 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.438
2025-07-01 17:49:05.438 # do intraline marking on the synch pair
2025-07-01 17:49:05.438 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.438 if eqi is None:
2025-07-01 17:49:05.438 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.438 atags = btags = ""
2025-07-01 17:49:05.438 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.438 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.439 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.439 if tag == 'replace':
2025-07-01 17:49:05.439 atags += '^' * la
2025-07-01 17:49:05.439 btags += '^' * lb
2025-07-01 17:49:05.439 elif tag == 'delete':
2025-07-01 17:49:05.439 atags += '-' * la
2025-07-01 17:49:05.439 elif tag == 'insert':
2025-07-01 17:49:05.439 btags += '+' * lb
2025-07-01 17:49:05.439 elif tag == 'equal':
2025-07-01 17:49:05.439 atags += ' ' * la
2025-07-01 17:49:05.439 btags += ' ' * lb
2025-07-01 17:49:05.439 else:
2025-07-01 17:49:05.439 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.439 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.439 else:
2025-07-01 17:49:05.439 # the synch pair is identical
2025-07-01 17:49:05.439 yield ' ' + aelt
2025-07-01 17:49:05.439
2025-07-01 17:49:05.439 # pump out diffs from after the synch point
2025-07-01 17:49:05.439 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.439
2025-07-01 17:49:05.440 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.440 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.440
2025-07-01 17:49:05.440 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.440 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.440 alo = 16, ahi = 1101
2025-07-01 17:49:05.440 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.440 blo = 16, bhi = 1101
2025-07-01 17:49:05.440
2025-07-01 17:49:05.440 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.440 g = []
2025-07-01 17:49:05.440 if alo < ahi:
2025-07-01 17:49:05.440 if blo < bhi:
2025-07-01 17:49:05.440 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.440 else:
2025-07-01 17:49:05.440 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.440 elif blo < bhi:
2025-07-01 17:49:05.440 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.440
2025-07-01 17:49:05.440 > yield from g
2025-07-01 17:49:05.441
2025-07-01 17:49:05.441 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.441 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.441
2025-07-01 17:49:05.441 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.441 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.441 alo = 16, ahi = 1101
2025-07-01 17:49:05.441 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.441 blo = 16, bhi = 1101
2025-07-01 17:49:05.441
2025-07-01 17:49:05.441 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.441 r"""
2025-07-01 17:49:05.441 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.441 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.441 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.441 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.441
2025-07-01 17:49:05.441 Example:
2025-07-01 17:49:05.441
2025-07-01 17:49:05.441 >>> d = Differ()
2025-07-01 17:49:05.441 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.445 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.445 >>> print(''.join(results), end="")
2025-07-01 17:49:05.445 - abcDefghiJkl
2025-07-01 17:49:05.445 + abcdefGhijkl
2025-07-01 17:49:05.445 """
2025-07-01 17:49:05.445
2025-07-01 17:49:05.445 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.445 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.445 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.445 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.445 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.445
2025-07-01 17:49:05.445 # search for the pair that matches best without being identical
2025-07-01 17:49:05.445 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.445 # on junk -- unless we have to)
2025-07-01 17:49:05.445 for j in range(blo, bhi):
2025-07-01 17:49:05.445 bj = b[j]
2025-07-01 17:49:05.445 cruncher.set_seq2(bj)
2025-07-01 17:49:05.446 for i in range(alo, ahi):
2025-07-01 17:49:05.446 ai = a[i]
2025-07-01 17:49:05.446 if ai == bj:
2025-07-01 17:49:05.446 if eqi is None:
2025-07-01 17:49:05.446 eqi, eqj = i, j
2025-07-01 17:49:05.446 continue
2025-07-01 17:49:05.446 cruncher.set_seq1(ai)
2025-07-01 17:49:05.446 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.446 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.446 # compares by a factor of 3.
2025-07-01 17:49:05.446 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.446 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.446 # of the computation is cached by cruncher
2025-07-01 17:49:05.446 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.446 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.446 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.446 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.446 if best_ratio < cutoff:
2025-07-01 17:49:05.446 # no non-identical "pretty close" pair
2025-07-01 17:49:05.447 if eqi is None:
2025-07-01 17:49:05.447 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.447 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.447 return
2025-07-01 17:49:05.447 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.447 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.447 else:
2025-07-01 17:49:05.447 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.447 eqi = None
2025-07-01 17:49:05.447
2025-07-01 17:49:05.447 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.447 # identical
2025-07-01 17:49:05.447
2025-07-01 17:49:05.447 # pump out diffs from before the synch point
2025-07-01 17:49:05.447 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.447
2025-07-01 17:49:05.447 # do intraline marking on the synch pair
2025-07-01 17:49:05.447 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.447 if eqi is None:
2025-07-01 17:49:05.447 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.448 atags = btags = ""
2025-07-01 17:49:05.448 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.448 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.448 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.448 if tag == 'replace':
2025-07-01 17:49:05.448 atags += '^' * la
2025-07-01 17:49:05.448 btags += '^' * lb
2025-07-01 17:49:05.448 elif tag == 'delete':
2025-07-01 17:49:05.448 atags += '-' * la
2025-07-01 17:49:05.448 elif tag == 'insert':
2025-07-01 17:49:05.448 btags += '+' * lb
2025-07-01 17:49:05.448 elif tag == 'equal':
2025-07-01 17:49:05.448 atags += ' ' * la
2025-07-01 17:49:05.448 btags += ' ' * lb
2025-07-01 17:49:05.448 else:
2025-07-01 17:49:05.448 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.449 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.449 else:
2025-07-01 17:49:05.449 # the synch pair is identical
2025-07-01 17:49:05.449 yield ' ' + aelt
2025-07-01 17:49:05.449
2025-07-01 17:49:05.449 # pump out diffs from after the synch point
2025-07-01 17:49:05.449 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.449
2025-07-01 17:49:05.449 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.449 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.449
2025-07-01 17:49:05.449 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.449 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.449 alo = 17, ahi = 1101
2025-07-01 17:49:05.449 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.450 blo = 17, bhi = 1101
2025-07-01 17:49:05.450
2025-07-01 17:49:05.450 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.450 g = []
2025-07-01 17:49:05.450 if alo < ahi:
2025-07-01 17:49:05.450 if blo < bhi:
2025-07-01 17:49:05.450 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.450 else:
2025-07-01 17:49:05.450 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.450 elif blo < bhi:
2025-07-01 17:49:05.450 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.450
2025-07-01 17:49:05.450 > yield from g
2025-07-01 17:49:05.450
2025-07-01 17:49:05.451 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.451 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.451
2025-07-01 17:49:05.451 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.451 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.451 alo = 17, ahi = 1101
2025-07-01 17:49:05.451 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.451 blo = 17, bhi = 1101
2025-07-01 17:49:05.451
2025-07-01 17:49:05.451 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.451 r"""
2025-07-01 17:49:05.451 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.451 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.451 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.451 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.452
2025-07-01 17:49:05.452 Example:
2025-07-01 17:49:05.452
2025-07-01 17:49:05.452 >>> d = Differ()
2025-07-01 17:49:05.452 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.452 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.452 >>> print(''.join(results), end="")
2025-07-01 17:49:05.452 - abcDefghiJkl
2025-07-01 17:49:05.452 + abcdefGhijkl
2025-07-01 17:49:05.452 """
2025-07-01 17:49:05.452
2025-07-01 17:49:05.452 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.452 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.453 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.453 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.453 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.453
2025-07-01 17:49:05.453 # search for the pair that matches best without being identical
2025-07-01 17:49:05.453 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.453 # on junk -- unless we have to)
2025-07-01 17:49:05.453 for j in range(blo, bhi):
2025-07-01 17:49:05.453 bj = b[j]
2025-07-01 17:49:05.453 cruncher.set_seq2(bj)
2025-07-01 17:49:05.453 for i in range(alo, ahi):
2025-07-01 17:49:05.453 ai = a[i]
2025-07-01 17:49:05.453 if ai == bj:
2025-07-01 17:49:05.453 if eqi is None:
2025-07-01 17:49:05.453 eqi, eqj = i, j
2025-07-01 17:49:05.453 continue
2025-07-01 17:49:05.453 cruncher.set_seq1(ai)
2025-07-01 17:49:05.454 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.454 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.454 # compares by a factor of 3.
2025-07-01 17:49:05.454 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.454 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.454 # of the computation is cached by cruncher
2025-07-01 17:49:05.454 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.454 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.454 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.454 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.454 if best_ratio < cutoff:
2025-07-01 17:49:05.454 # no non-identical "pretty close" pair
2025-07-01 17:49:05.454 if eqi is None:
2025-07-01 17:49:05.454 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.454 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.454 return
2025-07-01 17:49:05.454 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.455 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.455 else:
2025-07-01 17:49:05.455 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.455 eqi = None
2025-07-01 17:49:05.455
2025-07-01 17:49:05.455 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.455 # identical
2025-07-01 17:49:05.455
2025-07-01 17:49:05.455 # pump out diffs from before the synch point
2025-07-01 17:49:05.455 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.455
2025-07-01 17:49:05.455 # do intraline marking on the synch pair
2025-07-01 17:49:05.455 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.455 if eqi is None:
2025-07-01 17:49:05.455 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.455 atags = btags = ""
2025-07-01 17:49:05.456 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.456 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.456 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.456 if tag == 'replace':
2025-07-01 17:49:05.456 atags += '^' * la
2025-07-01 17:49:05.456 btags += '^' * lb
2025-07-01 17:49:05.456 elif tag == 'delete':
2025-07-01 17:49:05.456 atags += '-' * la
2025-07-01 17:49:05.456 elif tag == 'insert':
2025-07-01 17:49:05.456 btags += '+' * lb
2025-07-01 17:49:05.456 elif tag == 'equal':
2025-07-01 17:49:05.456 atags += ' ' * la
2025-07-01 17:49:05.456 btags += ' ' * lb
2025-07-01 17:49:05.456 else:
2025-07-01 17:49:05.456 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.456 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.456 else:
2025-07-01 17:49:05.457 # the synch pair is identical
2025-07-01 17:49:05.457 yield ' ' + aelt
2025-07-01 17:49:05.457
2025-07-01 17:49:05.457 # pump out diffs from after the synch point
2025-07-01 17:49:05.457 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.457
2025-07-01 17:49:05.457 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.457 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.457
2025-07-01 17:49:05.457 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.457 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.457 alo = 18, ahi = 1101
2025-07-01 17:49:05.457 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.457 blo = 18, bhi = 1101
2025-07-01 17:49:05.457
2025-07-01 17:49:05.457 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.458 g = []
2025-07-01 17:49:05.462 if alo < ahi:
2025-07-01 17:49:05.463 if blo < bhi:
2025-07-01 17:49:05.463 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.463 else:
2025-07-01 17:49:05.463 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.463 elif blo < bhi:
2025-07-01 17:49:05.463 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.463
2025-07-01 17:49:05.463 > yield from g
2025-07-01 17:49:05.463
2025-07-01 17:49:05.463 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.463 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.463
2025-07-01 17:49:05.463 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.463 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.463 alo = 18, ahi = 1101
2025-07-01 17:49:05.463 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.464 blo = 18, bhi = 1101
2025-07-01 17:49:05.464
2025-07-01 17:49:05.464 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.464 r"""
2025-07-01 17:49:05.464 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.464 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.464 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.464 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.464
2025-07-01 17:49:05.464 Example:
2025-07-01 17:49:05.464
2025-07-01 17:49:05.464 >>> d = Differ()
2025-07-01 17:49:05.464 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.464 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.464 >>> print(''.join(results), end="")
2025-07-01 17:49:05.464 - abcDefghiJkl
2025-07-01 17:49:05.465 + abcdefGhijkl
2025-07-01 17:49:05.465 """
2025-07-01 17:49:05.465
2025-07-01 17:49:05.465 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.465 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.465 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.465 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.465 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.465
2025-07-01 17:49:05.465 # search for the pair that matches best without being identical
2025-07-01 17:49:05.465 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.465 # on junk -- unless we have to)
2025-07-01 17:49:05.465 for j in range(blo, bhi):
2025-07-01 17:49:05.465 bj = b[j]
2025-07-01 17:49:05.465 cruncher.set_seq2(bj)
2025-07-01 17:49:05.466 for i in range(alo, ahi):
2025-07-01 17:49:05.466 ai = a[i]
2025-07-01 17:49:05.466 if ai == bj:
2025-07-01 17:49:05.466 if eqi is None:
2025-07-01 17:49:05.466 eqi, eqj = i, j
2025-07-01 17:49:05.466 continue
2025-07-01 17:49:05.466 cruncher.set_seq1(ai)
2025-07-01 17:49:05.466 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.466 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.466 # compares by a factor of 3.
2025-07-01 17:49:05.466 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.466 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.466 # of the computation is cached by cruncher
2025-07-01 17:49:05.466 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.466 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.466 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.466 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.467 if best_ratio < cutoff:
2025-07-01 17:49:05.467 # no non-identical "pretty close" pair
2025-07-01 17:49:05.467 if eqi is None:
2025-07-01 17:49:05.467 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.467 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.467 return
2025-07-01 17:49:05.467 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.467 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.467 else:
2025-07-01 17:49:05.467 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.467 eqi = None
2025-07-01 17:49:05.467
2025-07-01 17:49:05.467 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.467 # identical
2025-07-01 17:49:05.467
2025-07-01 17:49:05.467 # pump out diffs from before the synch point
2025-07-01 17:49:05.467 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.468
2025-07-01 17:49:05.468 # do intraline marking on the synch pair
2025-07-01 17:49:05.468 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.468 if eqi is None:
2025-07-01 17:49:05.468 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.468 atags = btags = ""
2025-07-01 17:49:05.468 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.468 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.468 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.468 if tag == 'replace':
2025-07-01 17:49:05.468 atags += '^' * la
2025-07-01 17:49:05.468 btags += '^' * lb
2025-07-01 17:49:05.468 elif tag == 'delete':
2025-07-01 17:49:05.468 atags += '-' * la
2025-07-01 17:49:05.468 elif tag == 'insert':
2025-07-01 17:49:05.468 btags += '+' * lb
2025-07-01 17:49:05.468 elif tag == 'equal':
2025-07-01 17:49:05.469 atags += ' ' * la
2025-07-01 17:49:05.469 btags += ' ' * lb
2025-07-01 17:49:05.469 else:
2025-07-01 17:49:05.469 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.469 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.469 else:
2025-07-01 17:49:05.469 # the synch pair is identical
2025-07-01 17:49:05.469 yield ' ' + aelt
2025-07-01 17:49:05.469
2025-07-01 17:49:05.469 # pump out diffs from after the synch point
2025-07-01 17:49:05.469 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.469
2025-07-01 17:49:05.469 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.469 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.469
2025-07-01 17:49:05.469 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.469 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.470 alo = 19, ahi = 1101
2025-07-01 17:49:05.470 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.470 blo = 19, bhi = 1101
2025-07-01 17:49:05.470
2025-07-01 17:49:05.470 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.470 g = []
2025-07-01 17:49:05.470 if alo < ahi:
2025-07-01 17:49:05.470 if blo < bhi:
2025-07-01 17:49:05.470 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.470 else:
2025-07-01 17:49:05.470 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.470 elif blo < bhi:
2025-07-01 17:49:05.470 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.470
2025-07-01 17:49:05.470 > yield from g
2025-07-01 17:49:05.470
2025-07-01 17:49:05.471 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.471 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.471
2025-07-01 17:49:05.471 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.471 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.471 alo = 19, ahi = 1101
2025-07-01 17:49:05.471 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.471 blo = 19, bhi = 1101
2025-07-01 17:49:05.471
2025-07-01 17:49:05.471 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.471 r"""
2025-07-01 17:49:05.471 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.471 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.471 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.471 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.472
2025-07-01 17:49:05.472 Example:
2025-07-01 17:49:05.472
2025-07-01 17:49:05.472 >>> d = Differ()
2025-07-01 17:49:05.472 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.472 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.472 >>> print(''.join(results), end="")
2025-07-01 17:49:05.472 - abcDefghiJkl
2025-07-01 17:49:05.472 + abcdefGhijkl
2025-07-01 17:49:05.472 """
2025-07-01 17:49:05.472
2025-07-01 17:49:05.472 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.472 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.472 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.472 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.473 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.473
2025-07-01 17:49:05.473 # search for the pair that matches best without being identical
2025-07-01 17:49:05.473 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.473 # on junk -- unless we have to)
2025-07-01 17:49:05.473 for j in range(blo, bhi):
2025-07-01 17:49:05.473 bj = b[j]
2025-07-01 17:49:05.473 cruncher.set_seq2(bj)
2025-07-01 17:49:05.473 for i in range(alo, ahi):
2025-07-01 17:49:05.473 ai = a[i]
2025-07-01 17:49:05.473 if ai == bj:
2025-07-01 17:49:05.473 if eqi is None:
2025-07-01 17:49:05.473 eqi, eqj = i, j
2025-07-01 17:49:05.473 continue
2025-07-01 17:49:05.473 cruncher.set_seq1(ai)
2025-07-01 17:49:05.473 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.474 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.477 # compares by a factor of 3.
2025-07-01 17:49:05.477 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.477 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.477 # of the computation is cached by cruncher
2025-07-01 17:49:05.477 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.477 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.477 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.477 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.477 if best_ratio < cutoff:
2025-07-01 17:49:05.477 # no non-identical "pretty close" pair
2025-07-01 17:49:05.477 if eqi is None:
2025-07-01 17:49:05.477 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.478 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.478 return
2025-07-01 17:49:05.478 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.478 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.478 else:
2025-07-01 17:49:05.478 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.478 eqi = None
2025-07-01 17:49:05.478
2025-07-01 17:49:05.478 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.478 # identical
2025-07-01 17:49:05.478
2025-07-01 17:49:05.478 # pump out diffs from before the synch point
2025-07-01 17:49:05.478 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.478
2025-07-01 17:49:05.478 # do intraline marking on the synch pair
2025-07-01 17:49:05.478 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.478 if eqi is None:
2025-07-01 17:49:05.479 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.479 atags = btags = ""
2025-07-01 17:49:05.479 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.479 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.479 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.479 if tag == 'replace':
2025-07-01 17:49:05.479 atags += '^' * la
2025-07-01 17:49:05.479 btags += '^' * lb
2025-07-01 17:49:05.479 elif tag == 'delete':
2025-07-01 17:49:05.479 atags += '-' * la
2025-07-01 17:49:05.479 elif tag == 'insert':
2025-07-01 17:49:05.479 btags += '+' * lb
2025-07-01 17:49:05.479 elif tag == 'equal':
2025-07-01 17:49:05.479 atags += ' ' * la
2025-07-01 17:49:05.479 btags += ' ' * lb
2025-07-01 17:49:05.479 else:
2025-07-01 17:49:05.479 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.480 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.480 else:
2025-07-01 17:49:05.480 # the synch pair is identical
2025-07-01 17:49:05.480 yield ' ' + aelt
2025-07-01 17:49:05.480
2025-07-01 17:49:05.480 # pump out diffs from after the synch point
2025-07-01 17:49:05.480 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.480
2025-07-01 17:49:05.480 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.480 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.480
2025-07-01 17:49:05.480 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.480 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.480 alo = 20, ahi = 1101
2025-07-01 17:49:05.480 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.480 blo = 20, bhi = 1101
2025-07-01 17:49:05.481
2025-07-01 17:49:05.481 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.481 g = []
2025-07-01 17:49:05.481 if alo < ahi:
2025-07-01 17:49:05.481 if blo < bhi:
2025-07-01 17:49:05.481 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.481 else:
2025-07-01 17:49:05.481 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.481 elif blo < bhi:
2025-07-01 17:49:05.481 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.481
2025-07-01 17:49:05.481 > yield from g
2025-07-01 17:49:05.481
2025-07-01 17:49:05.481 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.481 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.481
2025-07-01 17:49:05.481 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.482 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.482 alo = 20, ahi = 1101
2025-07-01 17:49:05.482 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.482 blo = 20, bhi = 1101
2025-07-01 17:49:05.482
2025-07-01 17:49:05.482 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.482 r"""
2025-07-01 17:49:05.482 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.482 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.482 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.482 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.482
2025-07-01 17:49:05.482 Example:
2025-07-01 17:49:05.482
2025-07-01 17:49:05.482 >>> d = Differ()
2025-07-01 17:49:05.482 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.482 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.483 >>> print(''.join(results), end="")
2025-07-01 17:49:05.483 - abcDefghiJkl
2025-07-01 17:49:05.483 + abcdefGhijkl
2025-07-01 17:49:05.483 """
2025-07-01 17:49:05.483
2025-07-01 17:49:05.483 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.483 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.483 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.483 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.483 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.483
2025-07-01 17:49:05.483 # search for the pair that matches best without being identical
2025-07-01 17:49:05.483 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.483 # on junk -- unless we have to)
2025-07-01 17:49:05.483 for j in range(blo, bhi):
2025-07-01 17:49:05.483 bj = b[j]
2025-07-01 17:49:05.484 cruncher.set_seq2(bj)
2025-07-01 17:49:05.484 for i in range(alo, ahi):
2025-07-01 17:49:05.484 ai = a[i]
2025-07-01 17:49:05.484 if ai == bj:
2025-07-01 17:49:05.484 if eqi is None:
2025-07-01 17:49:05.484 eqi, eqj = i, j
2025-07-01 17:49:05.484 continue
2025-07-01 17:49:05.484 cruncher.set_seq1(ai)
2025-07-01 17:49:05.484 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.484 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.484 # compares by a factor of 3.
2025-07-01 17:49:05.484 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.484 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.484 # of the computation is cached by cruncher
2025-07-01 17:49:05.484 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.484 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.485 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.485 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.485 if best_ratio < cutoff:
2025-07-01 17:49:05.485 # no non-identical "pretty close" pair
2025-07-01 17:49:05.485 if eqi is None:
2025-07-01 17:49:05.485 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.485 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.485 return
2025-07-01 17:49:05.485 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.485 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.485 else:
2025-07-01 17:49:05.485 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.485 eqi = None
2025-07-01 17:49:05.485
2025-07-01 17:49:05.485 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.486 # identical
2025-07-01 17:49:05.486
2025-07-01 17:49:05.486 # pump out diffs from before the synch point
2025-07-01 17:49:05.486 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.486
2025-07-01 17:49:05.486 # do intraline marking on the synch pair
2025-07-01 17:49:05.486 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.486 if eqi is None:
2025-07-01 17:49:05.486 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.486 atags = btags = ""
2025-07-01 17:49:05.486 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.486 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.486 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.486 if tag == 'replace':
2025-07-01 17:49:05.486 atags += '^' * la
2025-07-01 17:49:05.486 btags += '^' * lb
2025-07-01 17:49:05.487 elif tag == 'delete':
2025-07-01 17:49:05.487 atags += '-' * la
2025-07-01 17:49:05.487 elif tag == 'insert':
2025-07-01 17:49:05.487 btags += '+' * lb
2025-07-01 17:49:05.487 elif tag == 'equal':
2025-07-01 17:49:05.487 atags += ' ' * la
2025-07-01 17:49:05.487 btags += ' ' * lb
2025-07-01 17:49:05.487 else:
2025-07-01 17:49:05.487 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.487 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.487 else:
2025-07-01 17:49:05.487 # the synch pair is identical
2025-07-01 17:49:05.487 yield ' ' + aelt
2025-07-01 17:49:05.487
2025-07-01 17:49:05.487 # pump out diffs from after the synch point
2025-07-01 17:49:05.487 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.488
2025-07-01 17:49:05.488 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.488 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.488
2025-07-01 17:49:05.488 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.488 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.488 alo = 21, ahi = 1101
2025-07-01 17:49:05.488 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.488 blo = 21, bhi = 1101
2025-07-01 17:49:05.488
2025-07-01 17:49:05.488 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.488 g = []
2025-07-01 17:49:05.488 if alo < ahi:
2025-07-01 17:49:05.488 if blo < bhi:
2025-07-01 17:49:05.488 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.488 else:
2025-07-01 17:49:05.488 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.489 elif blo < bhi:
2025-07-01 17:49:05.489 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.489
2025-07-01 17:49:05.489 > yield from g
2025-07-01 17:49:05.489
2025-07-01 17:49:05.489 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.489 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.489
2025-07-01 17:49:05.489 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.489 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.489 alo = 21, ahi = 1101
2025-07-01 17:49:05.489 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.489 blo = 21, bhi = 1101
2025-07-01 17:49:05.489
2025-07-01 17:49:05.489 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.489 r"""
2025-07-01 17:49:05.489 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.490 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.493 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.493 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.493
2025-07-01 17:49:05.493 Example:
2025-07-01 17:49:05.493
2025-07-01 17:49:05.493 >>> d = Differ()
2025-07-01 17:49:05.493 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.493 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.493 >>> print(''.join(results), end="")
2025-07-01 17:49:05.493 - abcDefghiJkl
2025-07-01 17:49:05.493 + abcdefGhijkl
2025-07-01 17:49:05.494 """
2025-07-01 17:49:05.494
2025-07-01 17:49:05.494 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.494 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.494 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.494 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.494 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.494
2025-07-01 17:49:05.494 # search for the pair that matches best without being identical
2025-07-01 17:49:05.494 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.494 # on junk -- unless we have to)
2025-07-01 17:49:05.494 for j in range(blo, bhi):
2025-07-01 17:49:05.494 bj = b[j]
2025-07-01 17:49:05.494 cruncher.set_seq2(bj)
2025-07-01 17:49:05.494 for i in range(alo, ahi):
2025-07-01 17:49:05.494 ai = a[i]
2025-07-01 17:49:05.495 if ai == bj:
2025-07-01 17:49:05.495 if eqi is None:
2025-07-01 17:49:05.495 eqi, eqj = i, j
2025-07-01 17:49:05.495 continue
2025-07-01 17:49:05.495 cruncher.set_seq1(ai)
2025-07-01 17:49:05.495 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.495 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.495 # compares by a factor of 3.
2025-07-01 17:49:05.495 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.495 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.495 # of the computation is cached by cruncher
2025-07-01 17:49:05.495 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.495 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.495 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.495 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.496 if best_ratio < cutoff:
2025-07-01 17:49:05.496 # no non-identical "pretty close" pair
2025-07-01 17:49:05.496 if eqi is None:
2025-07-01 17:49:05.496 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.496 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.496 return
2025-07-01 17:49:05.496 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.496 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.496 else:
2025-07-01 17:49:05.496 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.496 eqi = None
2025-07-01 17:49:05.496
2025-07-01 17:49:05.496 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.496 # identical
2025-07-01 17:49:05.496
2025-07-01 17:49:05.496 # pump out diffs from before the synch point
2025-07-01 17:49:05.496 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.497
2025-07-01 17:49:05.497 # do intraline marking on the synch pair
2025-07-01 17:49:05.497 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.497 if eqi is None:
2025-07-01 17:49:05.497 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.497 atags = btags = ""
2025-07-01 17:49:05.497 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.497 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.497 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.497 if tag == 'replace':
2025-07-01 17:49:05.497 atags += '^' * la
2025-07-01 17:49:05.497 btags += '^' * lb
2025-07-01 17:49:05.497 elif tag == 'delete':
2025-07-01 17:49:05.497 atags += '-' * la
2025-07-01 17:49:05.497 elif tag == 'insert':
2025-07-01 17:49:05.497 btags += '+' * lb
2025-07-01 17:49:05.498 elif tag == 'equal':
2025-07-01 17:49:05.498 atags += ' ' * la
2025-07-01 17:49:05.498 btags += ' ' * lb
2025-07-01 17:49:05.498 else:
2025-07-01 17:49:05.498 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.498 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.498 else:
2025-07-01 17:49:05.498 # the synch pair is identical
2025-07-01 17:49:05.498 yield ' ' + aelt
2025-07-01 17:49:05.498
2025-07-01 17:49:05.498 # pump out diffs from after the synch point
2025-07-01 17:49:05.498 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.498
2025-07-01 17:49:05.498 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.498 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.498
2025-07-01 17:49:05.499 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.499 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.499 alo = 22, ahi = 1101
2025-07-01 17:49:05.499 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.499 blo = 22, bhi = 1101
2025-07-01 17:49:05.499
2025-07-01 17:49:05.499 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.499 g = []
2025-07-01 17:49:05.499 if alo < ahi:
2025-07-01 17:49:05.499 if blo < bhi:
2025-07-01 17:49:05.499 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.499 else:
2025-07-01 17:49:05.499 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.499 elif blo < bhi:
2025-07-01 17:49:05.499 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.499
2025-07-01 17:49:05.500 > yield from g
2025-07-01 17:49:05.500
2025-07-01 17:49:05.500 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.500 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.500
2025-07-01 17:49:05.500 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.500 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.500 alo = 22, ahi = 1101
2025-07-01 17:49:05.500 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.500 blo = 22, bhi = 1101
2025-07-01 17:49:05.500
2025-07-01 17:49:05.500 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.500 r"""
2025-07-01 17:49:05.500 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.500 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.500 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.501 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.501
2025-07-01 17:49:05.501 Example:
2025-07-01 17:49:05.501
2025-07-01 17:49:05.501 >>> d = Differ()
2025-07-01 17:49:05.501 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.501 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.501 >>> print(''.join(results), end="")
2025-07-01 17:49:05.501 - abcDefghiJkl
2025-07-01 17:49:05.501 + abcdefGhijkl
2025-07-01 17:49:05.501 """
2025-07-01 17:49:05.501
2025-07-01 17:49:05.501 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.501 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.502 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.502 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.502 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.502
2025-07-01 17:49:05.502 # search for the pair that matches best without being identical
2025-07-01 17:49:05.502 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.502 # on junk -- unless we have to)
2025-07-01 17:49:05.502 for j in range(blo, bhi):
2025-07-01 17:49:05.502 bj = b[j]
2025-07-01 17:49:05.502 cruncher.set_seq2(bj)
2025-07-01 17:49:05.502 for i in range(alo, ahi):
2025-07-01 17:49:05.502 ai = a[i]
2025-07-01 17:49:05.502 if ai == bj:
2025-07-01 17:49:05.502 if eqi is None:
2025-07-01 17:49:05.502 eqi, eqj = i, j
2025-07-01 17:49:05.502 continue
2025-07-01 17:49:05.502 cruncher.set_seq1(ai)
2025-07-01 17:49:05.503 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.503 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.503 # compares by a factor of 3.
2025-07-01 17:49:05.503 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.503 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.503 # of the computation is cached by cruncher
2025-07-01 17:49:05.503 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.503 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.503 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.503 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.503 if best_ratio < cutoff:
2025-07-01 17:49:05.503 # no non-identical "pretty close" pair
2025-07-01 17:49:05.503 if eqi is None:
2025-07-01 17:49:05.503 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.503 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.503 return
2025-07-01 17:49:05.503 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.504 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.504 else:
2025-07-01 17:49:05.504 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.504 eqi = None
2025-07-01 17:49:05.504
2025-07-01 17:49:05.504 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.504 # identical
2025-07-01 17:49:05.504
2025-07-01 17:49:05.504 # pump out diffs from before the synch point
2025-07-01 17:49:05.504 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.504
2025-07-01 17:49:05.504 # do intraline marking on the synch pair
2025-07-01 17:49:05.504 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.504 if eqi is None:
2025-07-01 17:49:05.504 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.504 atags = btags = ""
2025-07-01 17:49:05.504 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.505 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.505 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.505 if tag == 'replace':
2025-07-01 17:49:05.505 atags += '^' * la
2025-07-01 17:49:05.505 btags += '^' * lb
2025-07-01 17:49:05.505 elif tag == 'delete':
2025-07-01 17:49:05.505 atags += '-' * la
2025-07-01 17:49:05.505 elif tag == 'insert':
2025-07-01 17:49:05.505 btags += '+' * lb
2025-07-01 17:49:05.505 elif tag == 'equal':
2025-07-01 17:49:05.505 atags += ' ' * la
2025-07-01 17:49:05.505 btags += ' ' * lb
2025-07-01 17:49:05.505 else:
2025-07-01 17:49:05.505 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.505 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.506 else:
2025-07-01 17:49:05.509 # the synch pair is identical
2025-07-01 17:49:05.509 yield ' ' + aelt
2025-07-01 17:49:05.509
2025-07-01 17:49:05.509 # pump out diffs from after the synch point
2025-07-01 17:49:05.509 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.509
2025-07-01 17:49:05.509 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.509 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.509
2025-07-01 17:49:05.509 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.509 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.509 alo = 23, ahi = 1101
2025-07-01 17:49:05.509 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.509 blo = 23, bhi = 1101
2025-07-01 17:49:05.509
2025-07-01 17:49:05.510 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.510 g = []
2025-07-01 17:49:05.510 if alo < ahi:
2025-07-01 17:49:05.510 if blo < bhi:
2025-07-01 17:49:05.510 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.510 else:
2025-07-01 17:49:05.510 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.510 elif blo < bhi:
2025-07-01 17:49:05.510 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.510
2025-07-01 17:49:05.510 > yield from g
2025-07-01 17:49:05.510
2025-07-01 17:49:05.510 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.510 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.510
2025-07-01 17:49:05.510 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.511 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.511 alo = 23, ahi = 1101
2025-07-01 17:49:05.511 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.511 blo = 23, bhi = 1101
2025-07-01 17:49:05.511
2025-07-01 17:49:05.511 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.511 r"""
2025-07-01 17:49:05.511 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.511 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.511 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.511 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.511
2025-07-01 17:49:05.511 Example:
2025-07-01 17:49:05.511
2025-07-01 17:49:05.511 >>> d = Differ()
2025-07-01 17:49:05.511 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.512 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.512 >>> print(''.join(results), end="")
2025-07-01 17:49:05.512 - abcDefghiJkl
2025-07-01 17:49:05.512 + abcdefGhijkl
2025-07-01 17:49:05.512 """
2025-07-01 17:49:05.512
2025-07-01 17:49:05.512 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.512 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.512 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.512 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.512 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.512
2025-07-01 17:49:05.512 # search for the pair that matches best without being identical
2025-07-01 17:49:05.513 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.513 # on junk -- unless we have to)
2025-07-01 17:49:05.513 for j in range(blo, bhi):
2025-07-01 17:49:05.513 bj = b[j]
2025-07-01 17:49:05.513 cruncher.set_seq2(bj)
2025-07-01 17:49:05.513 for i in range(alo, ahi):
2025-07-01 17:49:05.513 ai = a[i]
2025-07-01 17:49:05.513 if ai == bj:
2025-07-01 17:49:05.513 if eqi is None:
2025-07-01 17:49:05.513 eqi, eqj = i, j
2025-07-01 17:49:05.513 continue
2025-07-01 17:49:05.513 cruncher.set_seq1(ai)
2025-07-01 17:49:05.513 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.513 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.513 # compares by a factor of 3.
2025-07-01 17:49:05.513 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.514 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.514 # of the computation is cached by cruncher
2025-07-01 17:49:05.514 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.514 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.514 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.514 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.514 if best_ratio < cutoff:
2025-07-01 17:49:05.514 # no non-identical "pretty close" pair
2025-07-01 17:49:05.514 if eqi is None:
2025-07-01 17:49:05.514 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.514 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.514 return
2025-07-01 17:49:05.514 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.514 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.514 else:
2025-07-01 17:49:05.514 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.515 eqi = None
2025-07-01 17:49:05.515
2025-07-01 17:49:05.515 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.515 # identical
2025-07-01 17:49:05.515
2025-07-01 17:49:05.515 # pump out diffs from before the synch point
2025-07-01 17:49:05.515 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.515
2025-07-01 17:49:05.515 # do intraline marking on the synch pair
2025-07-01 17:49:05.515 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.515 if eqi is None:
2025-07-01 17:49:05.515 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.515 atags = btags = ""
2025-07-01 17:49:05.515 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.515 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.515 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.516 if tag == 'replace':
2025-07-01 17:49:05.516 atags += '^' * la
2025-07-01 17:49:05.516 btags += '^' * lb
2025-07-01 17:49:05.516 elif tag == 'delete':
2025-07-01 17:49:05.516 atags += '-' * la
2025-07-01 17:49:05.516 elif tag == 'insert':
2025-07-01 17:49:05.516 btags += '+' * lb
2025-07-01 17:49:05.516 elif tag == 'equal':
2025-07-01 17:49:05.516 atags += ' ' * la
2025-07-01 17:49:05.516 btags += ' ' * lb
2025-07-01 17:49:05.516 else:
2025-07-01 17:49:05.516 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.516 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.516 else:
2025-07-01 17:49:05.516 # the synch pair is identical
2025-07-01 17:49:05.516 yield ' ' + aelt
2025-07-01 17:49:05.517
2025-07-01 17:49:05.517 # pump out diffs from after the synch point
2025-07-01 17:49:05.517 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.517
2025-07-01 17:49:05.517 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.517 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.517
2025-07-01 17:49:05.517 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.517 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.517 alo = 26, ahi = 1101
2025-07-01 17:49:05.517 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.517 blo = 26, bhi = 1101
2025-07-01 17:49:05.517
2025-07-01 17:49:05.517 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.517 g = []
2025-07-01 17:49:05.517 if alo < ahi:
2025-07-01 17:49:05.518 if blo < bhi:
2025-07-01 17:49:05.518 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.518 else:
2025-07-01 17:49:05.518 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.518 elif blo < bhi:
2025-07-01 17:49:05.518 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.518
2025-07-01 17:49:05.518 > yield from g
2025-07-01 17:49:05.518
2025-07-01 17:49:05.518 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.518 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.518
2025-07-01 17:49:05.518 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.518 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.518 alo = 26, ahi = 1101
2025-07-01 17:49:05.518 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.518 blo = 26, bhi = 1101
2025-07-01 17:49:05.519
2025-07-01 17:49:05.519 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.519 r"""
2025-07-01 17:49:05.519 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.519 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.519 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.519 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.519
2025-07-01 17:49:05.519 Example:
2025-07-01 17:49:05.519
2025-07-01 17:49:05.519 >>> d = Differ()
2025-07-01 17:49:05.519 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.519 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.519 >>> print(''.join(results), end="")
2025-07-01 17:49:05.519 - abcDefghiJkl
2025-07-01 17:49:05.520 + abcdefGhijkl
2025-07-01 17:49:05.520 """
2025-07-01 17:49:05.520
2025-07-01 17:49:05.520 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.520 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.520 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.520 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.520 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.520
2025-07-01 17:49:05.520 # search for the pair that matches best without being identical
2025-07-01 17:49:05.520 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.520 # on junk -- unless we have to)
2025-07-01 17:49:05.520 for j in range(blo, bhi):
2025-07-01 17:49:05.520 bj = b[j]
2025-07-01 17:49:05.520 cruncher.set_seq2(bj)
2025-07-01 17:49:05.520 for i in range(alo, ahi):
2025-07-01 17:49:05.521 ai = a[i]
2025-07-01 17:49:05.521 if ai == bj:
2025-07-01 17:49:05.521 if eqi is None:
2025-07-01 17:49:05.521 eqi, eqj = i, j
2025-07-01 17:49:05.521 continue
2025-07-01 17:49:05.521 cruncher.set_seq1(ai)
2025-07-01 17:49:05.521 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.521 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.521 # compares by a factor of 3.
2025-07-01 17:49:05.521 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.521 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.521 # of the computation is cached by cruncher
2025-07-01 17:49:05.521 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.521 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.521 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.521 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.521 if best_ratio < cutoff:
2025-07-01 17:49:05.525 # no non-identical "pretty close" pair
2025-07-01 17:49:05.525 if eqi is None:
2025-07-01 17:49:05.525 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.525 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.525 return
2025-07-01 17:49:05.525 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.525 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.525 else:
2025-07-01 17:49:05.525 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.525 eqi = None
2025-07-01 17:49:05.525
2025-07-01 17:49:05.525 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.525 # identical
2025-07-01 17:49:05.525
2025-07-01 17:49:05.526 # pump out diffs from before the synch point
2025-07-01 17:49:05.526 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.526
2025-07-01 17:49:05.526 # do intraline marking on the synch pair
2025-07-01 17:49:05.526 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.526 if eqi is None:
2025-07-01 17:49:05.526 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.526 atags = btags = ""
2025-07-01 17:49:05.526 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.526 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.526 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.526 if tag == 'replace':
2025-07-01 17:49:05.526 atags += '^' * la
2025-07-01 17:49:05.526 btags += '^' * lb
2025-07-01 17:49:05.526 elif tag == 'delete':
2025-07-01 17:49:05.526 atags += '-' * la
2025-07-01 17:49:05.527 elif tag == 'insert':
2025-07-01 17:49:05.527 btags += '+' * lb
2025-07-01 17:49:05.527 elif tag == 'equal':
2025-07-01 17:49:05.527 atags += ' ' * la
2025-07-01 17:49:05.527 btags += ' ' * lb
2025-07-01 17:49:05.527 else:
2025-07-01 17:49:05.527 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.527 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.527 else:
2025-07-01 17:49:05.527 # the synch pair is identical
2025-07-01 17:49:05.527 yield ' ' + aelt
2025-07-01 17:49:05.527
2025-07-01 17:49:05.527 # pump out diffs from after the synch point
2025-07-01 17:49:05.527 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.527
2025-07-01 17:49:05.527 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.528 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.528
2025-07-01 17:49:05.528 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.528 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.528 alo = 27, ahi = 1101
2025-07-01 17:49:05.528 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.528 blo = 27, bhi = 1101
2025-07-01 17:49:05.528
2025-07-01 17:49:05.528 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.528 g = []
2025-07-01 17:49:05.528 if alo < ahi:
2025-07-01 17:49:05.528 if blo < bhi:
2025-07-01 17:49:05.528 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.528 else:
2025-07-01 17:49:05.528 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.528 elif blo < bhi:
2025-07-01 17:49:05.528 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.529
2025-07-01 17:49:05.529 > yield from g
2025-07-01 17:49:05.529
2025-07-01 17:49:05.529 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.529 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.529
2025-07-01 17:49:05.529 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.529 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.529 alo = 27, ahi = 1101
2025-07-01 17:49:05.529 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.529 blo = 27, bhi = 1101
2025-07-01 17:49:05.529
2025-07-01 17:49:05.529 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.529 r"""
2025-07-01 17:49:05.529 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.529 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.530 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.530 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.530
2025-07-01 17:49:05.530 Example:
2025-07-01 17:49:05.530
2025-07-01 17:49:05.530 >>> d = Differ()
2025-07-01 17:49:05.530 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.530 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.530 >>> print(''.join(results), end="")
2025-07-01 17:49:05.530 - abcDefghiJkl
2025-07-01 17:49:05.530 + abcdefGhijkl
2025-07-01 17:49:05.530 """
2025-07-01 17:49:05.530
2025-07-01 17:49:05.530 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.530 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.530 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.531 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.531 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.531
2025-07-01 17:49:05.531 # search for the pair that matches best without being identical
2025-07-01 17:49:05.531 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.531 # on junk -- unless we have to)
2025-07-01 17:49:05.531 for j in range(blo, bhi):
2025-07-01 17:49:05.531 bj = b[j]
2025-07-01 17:49:05.531 cruncher.set_seq2(bj)
2025-07-01 17:49:05.531 for i in range(alo, ahi):
2025-07-01 17:49:05.531 ai = a[i]
2025-07-01 17:49:05.531 if ai == bj:
2025-07-01 17:49:05.531 if eqi is None:
2025-07-01 17:49:05.531 eqi, eqj = i, j
2025-07-01 17:49:05.531 continue
2025-07-01 17:49:05.531 cruncher.set_seq1(ai)
2025-07-01 17:49:05.531 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.532 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.532 # compares by a factor of 3.
2025-07-01 17:49:05.532 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.532 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.532 # of the computation is cached by cruncher
2025-07-01 17:49:05.532 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.532 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.532 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.532 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.532 if best_ratio < cutoff:
2025-07-01 17:49:05.532 # no non-identical "pretty close" pair
2025-07-01 17:49:05.532 if eqi is None:
2025-07-01 17:49:05.532 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.532 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.532 return
2025-07-01 17:49:05.532 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.532 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.533 else:
2025-07-01 17:49:05.533 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.533 eqi = None
2025-07-01 17:49:05.533
2025-07-01 17:49:05.533 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.533 # identical
2025-07-01 17:49:05.533
2025-07-01 17:49:05.533 # pump out diffs from before the synch point
2025-07-01 17:49:05.533 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.533
2025-07-01 17:49:05.533 # do intraline marking on the synch pair
2025-07-01 17:49:05.533 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.533 if eqi is None:
2025-07-01 17:49:05.533 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.533 atags = btags = ""
2025-07-01 17:49:05.533 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.534 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.534 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.534 if tag == 'replace':
2025-07-01 17:49:05.534 atags += '^' * la
2025-07-01 17:49:05.534 btags += '^' * lb
2025-07-01 17:49:05.534 elif tag == 'delete':
2025-07-01 17:49:05.534 atags += '-' * la
2025-07-01 17:49:05.534 elif tag == 'insert':
2025-07-01 17:49:05.534 btags += '+' * lb
2025-07-01 17:49:05.534 elif tag == 'equal':
2025-07-01 17:49:05.534 atags += ' ' * la
2025-07-01 17:49:05.534 btags += ' ' * lb
2025-07-01 17:49:05.534 else:
2025-07-01 17:49:05.534 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.534 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.534 else:
2025-07-01 17:49:05.534 # the synch pair is identical
2025-07-01 17:49:05.534 yield ' ' + aelt
2025-07-01 17:49:05.535
2025-07-01 17:49:05.535 # pump out diffs from after the synch point
2025-07-01 17:49:05.535 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.535
2025-07-01 17:49:05.535 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.535 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.535
2025-07-01 17:49:05.535 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.535 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.535 alo = 28, ahi = 1101
2025-07-01 17:49:05.535 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.535 blo = 28, bhi = 1101
2025-07-01 17:49:05.535
2025-07-01 17:49:05.535 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.535 g = []
2025-07-01 17:49:05.535 if alo < ahi:
2025-07-01 17:49:05.536 if blo < bhi:
2025-07-01 17:49:05.536 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.536 else:
2025-07-01 17:49:05.536 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.536 elif blo < bhi:
2025-07-01 17:49:05.536 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.536
2025-07-01 17:49:05.536 > yield from g
2025-07-01 17:49:05.536
2025-07-01 17:49:05.536 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.536 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.536
2025-07-01 17:49:05.536 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.536 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.536 alo = 28, ahi = 1101
2025-07-01 17:49:05.536 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.537 blo = 28, bhi = 1101
2025-07-01 17:49:05.537
2025-07-01 17:49:05.537 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.537 r"""
2025-07-01 17:49:05.537 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.537 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.537 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.537 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.537
2025-07-01 17:49:05.537 Example:
2025-07-01 17:49:05.537
2025-07-01 17:49:05.537 >>> d = Differ()
2025-07-01 17:49:05.537 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.537 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.537 >>> print(''.join(results), end="")
2025-07-01 17:49:05.537 - abcDefghiJkl
2025-07-01 17:49:05.541 + abcdefGhijkl
2025-07-01 17:49:05.541 """
2025-07-01 17:49:05.541
2025-07-01 17:49:05.541 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.541 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.541 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.541 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.541 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.541
2025-07-01 17:49:05.541 # search for the pair that matches best without being identical
2025-07-01 17:49:05.541 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.541 # on junk -- unless we have to)
2025-07-01 17:49:05.542 for j in range(blo, bhi):
2025-07-01 17:49:05.542 bj = b[j]
2025-07-01 17:49:05.542 cruncher.set_seq2(bj)
2025-07-01 17:49:05.542 for i in range(alo, ahi):
2025-07-01 17:49:05.542 ai = a[i]
2025-07-01 17:49:05.542 if ai == bj:
2025-07-01 17:49:05.542 if eqi is None:
2025-07-01 17:49:05.542 eqi, eqj = i, j
2025-07-01 17:49:05.542 continue
2025-07-01 17:49:05.542 cruncher.set_seq1(ai)
2025-07-01 17:49:05.542 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.542 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.542 # compares by a factor of 3.
2025-07-01 17:49:05.542 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.542 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.542 # of the computation is cached by cruncher
2025-07-01 17:49:05.542 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.543 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.543 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.543 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.543 if best_ratio < cutoff:
2025-07-01 17:49:05.543 # no non-identical "pretty close" pair
2025-07-01 17:49:05.543 if eqi is None:
2025-07-01 17:49:05.543 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.543 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.543 return
2025-07-01 17:49:05.543 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.543 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.543 else:
2025-07-01 17:49:05.543 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.543 eqi = None
2025-07-01 17:49:05.543
2025-07-01 17:49:05.543 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.544 # identical
2025-07-01 17:49:05.544
2025-07-01 17:49:05.544 # pump out diffs from before the synch point
2025-07-01 17:49:05.544 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.544
2025-07-01 17:49:05.544 # do intraline marking on the synch pair
2025-07-01 17:49:05.544 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.544 if eqi is None:
2025-07-01 17:49:05.544 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.544 atags = btags = ""
2025-07-01 17:49:05.544 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.544 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.544 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.544 if tag == 'replace':
2025-07-01 17:49:05.544 atags += '^' * la
2025-07-01 17:49:05.544 btags += '^' * lb
2025-07-01 17:49:05.544 elif tag == 'delete':
2025-07-01 17:49:05.545 atags += '-' * la
2025-07-01 17:49:05.545 elif tag == 'insert':
2025-07-01 17:49:05.545 btags += '+' * lb
2025-07-01 17:49:05.545 elif tag == 'equal':
2025-07-01 17:49:05.545 atags += ' ' * la
2025-07-01 17:49:05.545 btags += ' ' * lb
2025-07-01 17:49:05.545 else:
2025-07-01 17:49:05.545 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.545 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.545 else:
2025-07-01 17:49:05.545 # the synch pair is identical
2025-07-01 17:49:05.545 yield ' ' + aelt
2025-07-01 17:49:05.545
2025-07-01 17:49:05.545 # pump out diffs from after the synch point
2025-07-01 17:49:05.545 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.545
2025-07-01 17:49:05.545 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.546 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.546
2025-07-01 17:49:05.546 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.546 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.546 alo = 29, ahi = 1101
2025-07-01 17:49:05.546 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.546 blo = 29, bhi = 1101
2025-07-01 17:49:05.546
2025-07-01 17:49:05.546 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.546 g = []
2025-07-01 17:49:05.546 if alo < ahi:
2025-07-01 17:49:05.546 if blo < bhi:
2025-07-01 17:49:05.546 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.546 else:
2025-07-01 17:49:05.546 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.546 elif blo < bhi:
2025-07-01 17:49:05.546 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.547
2025-07-01 17:49:05.547 > yield from g
2025-07-01 17:49:05.547
2025-07-01 17:49:05.547 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.547 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.547
2025-07-01 17:49:05.547 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.547 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.547 alo = 29, ahi = 1101
2025-07-01 17:49:05.547 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.547 blo = 29, bhi = 1101
2025-07-01 17:49:05.547
2025-07-01 17:49:05.547 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.547 r"""
2025-07-01 17:49:05.547 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.547 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.548 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.548 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.548
2025-07-01 17:49:05.548 Example:
2025-07-01 17:49:05.548
2025-07-01 17:49:05.548 >>> d = Differ()
2025-07-01 17:49:05.548 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.548 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.548 >>> print(''.join(results), end="")
2025-07-01 17:49:05.548 - abcDefghiJkl
2025-07-01 17:49:05.548 + abcdefGhijkl
2025-07-01 17:49:05.548 """
2025-07-01 17:49:05.548
2025-07-01 17:49:05.548 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.549 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.549 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.549 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.549 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.549
2025-07-01 17:49:05.549 # search for the pair that matches best without being identical
2025-07-01 17:49:05.549 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.549 # on junk -- unless we have to)
2025-07-01 17:49:05.549 for j in range(blo, bhi):
2025-07-01 17:49:05.549 bj = b[j]
2025-07-01 17:49:05.549 cruncher.set_seq2(bj)
2025-07-01 17:49:05.549 for i in range(alo, ahi):
2025-07-01 17:49:05.549 ai = a[i]
2025-07-01 17:49:05.549 if ai == bj:
2025-07-01 17:49:05.549 if eqi is None:
2025-07-01 17:49:05.549 eqi, eqj = i, j
2025-07-01 17:49:05.549 continue
2025-07-01 17:49:05.550 cruncher.set_seq1(ai)
2025-07-01 17:49:05.550 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.550 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.550 # compares by a factor of 3.
2025-07-01 17:49:05.550 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.550 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.550 # of the computation is cached by cruncher
2025-07-01 17:49:05.550 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.550 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.550 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.550 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.550 if best_ratio < cutoff:
2025-07-01 17:49:05.550 # no non-identical "pretty close" pair
2025-07-01 17:49:05.550 if eqi is None:
2025-07-01 17:49:05.550 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.551 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.551 return
2025-07-01 17:49:05.551 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.551 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.551 else:
2025-07-01 17:49:05.551 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.551 eqi = None
2025-07-01 17:49:05.551
2025-07-01 17:49:05.551 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.551 # identical
2025-07-01 17:49:05.551
2025-07-01 17:49:05.551 # pump out diffs from before the synch point
2025-07-01 17:49:05.551 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.551
2025-07-01 17:49:05.551 # do intraline marking on the synch pair
2025-07-01 17:49:05.551 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.551 if eqi is None:
2025-07-01 17:49:05.552 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.552 atags = btags = ""
2025-07-01 17:49:05.552 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.552 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.552 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.552 if tag == 'replace':
2025-07-01 17:49:05.552 atags += '^' * la
2025-07-01 17:49:05.552 btags += '^' * lb
2025-07-01 17:49:05.552 elif tag == 'delete':
2025-07-01 17:49:05.552 atags += '-' * la
2025-07-01 17:49:05.552 elif tag == 'insert':
2025-07-01 17:49:05.552 btags += '+' * lb
2025-07-01 17:49:05.552 elif tag == 'equal':
2025-07-01 17:49:05.552 atags += ' ' * la
2025-07-01 17:49:05.552 btags += ' ' * lb
2025-07-01 17:49:05.552 else:
2025-07-01 17:49:05.552 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.556 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.556 else:
2025-07-01 17:49:05.556 # the synch pair is identical
2025-07-01 17:49:05.556 yield ' ' + aelt
2025-07-01 17:49:05.556
2025-07-01 17:49:05.556 # pump out diffs from after the synch point
2025-07-01 17:49:05.556 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.556
2025-07-01 17:49:05.556 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.556 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.556
2025-07-01 17:49:05.556 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.556 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.556 alo = 30, ahi = 1101
2025-07-01 17:49:05.556 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.557 blo = 30, bhi = 1101
2025-07-01 17:49:05.557
2025-07-01 17:49:05.557 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.557 g = []
2025-07-01 17:49:05.557 if alo < ahi:
2025-07-01 17:49:05.557 if blo < bhi:
2025-07-01 17:49:05.557 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.557 else:
2025-07-01 17:49:05.557 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.557 elif blo < bhi:
2025-07-01 17:49:05.557 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.557
2025-07-01 17:49:05.557 > yield from g
2025-07-01 17:49:05.557
2025-07-01 17:49:05.557 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.557 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.557
2025-07-01 17:49:05.557 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.558 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.558 alo = 30, ahi = 1101
2025-07-01 17:49:05.558 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.558 blo = 30, bhi = 1101
2025-07-01 17:49:05.558
2025-07-01 17:49:05.558 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.558 r"""
2025-07-01 17:49:05.558 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.558 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.558 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.558 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.558
2025-07-01 17:49:05.558 Example:
2025-07-01 17:49:05.558
2025-07-01 17:49:05.558 >>> d = Differ()
2025-07-01 17:49:05.558 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.558 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.559 >>> print(''.join(results), end="")
2025-07-01 17:49:05.559 - abcDefghiJkl
2025-07-01 17:49:05.559 + abcdefGhijkl
2025-07-01 17:49:05.559 """
2025-07-01 17:49:05.559
2025-07-01 17:49:05.559 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.559 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.559 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.559 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.559 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.559
2025-07-01 17:49:05.559 # search for the pair that matches best without being identical
2025-07-01 17:49:05.559 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.559 # on junk -- unless we have to)
2025-07-01 17:49:05.560 for j in range(blo, bhi):
2025-07-01 17:49:05.560 bj = b[j]
2025-07-01 17:49:05.560 cruncher.set_seq2(bj)
2025-07-01 17:49:05.560 for i in range(alo, ahi):
2025-07-01 17:49:05.560 ai = a[i]
2025-07-01 17:49:05.560 if ai == bj:
2025-07-01 17:49:05.560 if eqi is None:
2025-07-01 17:49:05.560 eqi, eqj = i, j
2025-07-01 17:49:05.560 continue
2025-07-01 17:49:05.560 cruncher.set_seq1(ai)
2025-07-01 17:49:05.560 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.560 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.560 # compares by a factor of 3.
2025-07-01 17:49:05.560 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.560 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.560 # of the computation is cached by cruncher
2025-07-01 17:49:05.560 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.561 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.561 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.561 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.561 if best_ratio < cutoff:
2025-07-01 17:49:05.561 # no non-identical "pretty close" pair
2025-07-01 17:49:05.561 if eqi is None:
2025-07-01 17:49:05.561 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.561 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.561 return
2025-07-01 17:49:05.561 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.561 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.561 else:
2025-07-01 17:49:05.561 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.561 eqi = None
2025-07-01 17:49:05.561
2025-07-01 17:49:05.561 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.562 # identical
2025-07-01 17:49:05.562
2025-07-01 17:49:05.562 # pump out diffs from before the synch point
2025-07-01 17:49:05.562 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.562
2025-07-01 17:49:05.562 # do intraline marking on the synch pair
2025-07-01 17:49:05.562 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.562 if eqi is None:
2025-07-01 17:49:05.562 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.562 atags = btags = ""
2025-07-01 17:49:05.562 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.562 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.562 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.562 if tag == 'replace':
2025-07-01 17:49:05.562 atags += '^' * la
2025-07-01 17:49:05.562 btags += '^' * lb
2025-07-01 17:49:05.562 elif tag == 'delete':
2025-07-01 17:49:05.563 atags += '-' * la
2025-07-01 17:49:05.563 elif tag == 'insert':
2025-07-01 17:49:05.563 btags += '+' * lb
2025-07-01 17:49:05.563 elif tag == 'equal':
2025-07-01 17:49:05.563 atags += ' ' * la
2025-07-01 17:49:05.563 btags += ' ' * lb
2025-07-01 17:49:05.563 else:
2025-07-01 17:49:05.563 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.563 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.563 else:
2025-07-01 17:49:05.563 # the synch pair is identical
2025-07-01 17:49:05.563 yield ' ' + aelt
2025-07-01 17:49:05.563
2025-07-01 17:49:05.563 # pump out diffs from after the synch point
2025-07-01 17:49:05.563 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.564
2025-07-01 17:49:05.564 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.564 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.564
2025-07-01 17:49:05.564 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.564 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.564 alo = 31, ahi = 1101
2025-07-01 17:49:05.564 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.564 blo = 31, bhi = 1101
2025-07-01 17:49:05.564
2025-07-01 17:49:05.564 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.564 g = []
2025-07-01 17:49:05.564 if alo < ahi:
2025-07-01 17:49:05.564 if blo < bhi:
2025-07-01 17:49:05.564 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.565 else:
2025-07-01 17:49:05.565 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.565 elif blo < bhi:
2025-07-01 17:49:05.565 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.565
2025-07-01 17:49:05.565 > yield from g
2025-07-01 17:49:05.565
2025-07-01 17:49:05.565 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.565 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.565
2025-07-01 17:49:05.565 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.565 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.565 alo = 31, ahi = 1101
2025-07-01 17:49:05.565 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.565 blo = 31, bhi = 1101
2025-07-01 17:49:05.565
2025-07-01 17:49:05.565 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.566 r"""
2025-07-01 17:49:05.566 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.566 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.566 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.566 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.566
2025-07-01 17:49:05.566 Example:
2025-07-01 17:49:05.566
2025-07-01 17:49:05.566 >>> d = Differ()
2025-07-01 17:49:05.566 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.566 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.566 >>> print(''.join(results), end="")
2025-07-01 17:49:05.566 - abcDefghiJkl
2025-07-01 17:49:05.566 + abcdefGhijkl
2025-07-01 17:49:05.566 """
2025-07-01 17:49:05.567
2025-07-01 17:49:05.567 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.567 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.567 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.567 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.567 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.567
2025-07-01 17:49:05.567 # search for the pair that matches best without being identical
2025-07-01 17:49:05.567 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.567 # on junk -- unless we have to)
2025-07-01 17:49:05.567 for j in range(blo, bhi):
2025-07-01 17:49:05.567 bj = b[j]
2025-07-01 17:49:05.567 cruncher.set_seq2(bj)
2025-07-01 17:49:05.567 for i in range(alo, ahi):
2025-07-01 17:49:05.567 ai = a[i]
2025-07-01 17:49:05.567 if ai == bj:
2025-07-01 17:49:05.567 if eqi is None:
2025-07-01 17:49:05.571 eqi, eqj = i, j
2025-07-01 17:49:05.571 continue
2025-07-01 17:49:05.571 cruncher.set_seq1(ai)
2025-07-01 17:49:05.571 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.571 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.571 # compares by a factor of 3.
2025-07-01 17:49:05.571 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.571 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.571 # of the computation is cached by cruncher
2025-07-01 17:49:05.571 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.571 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.571 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.572 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.572 if best_ratio < cutoff:
2025-07-01 17:49:05.572 # no non-identical "pretty close" pair
2025-07-01 17:49:05.572 if eqi is None:
2025-07-01 17:49:05.572 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.572 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.572 return
2025-07-01 17:49:05.572 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.572 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.572 else:
2025-07-01 17:49:05.572 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.572 eqi = None
2025-07-01 17:49:05.572
2025-07-01 17:49:05.572 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.572 # identical
2025-07-01 17:49:05.573
2025-07-01 17:49:05.573 # pump out diffs from before the synch point
2025-07-01 17:49:05.573 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.573
2025-07-01 17:49:05.573 # do intraline marking on the synch pair
2025-07-01 17:49:05.573 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.573 if eqi is None:
2025-07-01 17:49:05.573 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.573 atags = btags = ""
2025-07-01 17:49:05.573 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.573 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.573 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.573 if tag == 'replace':
2025-07-01 17:49:05.573 atags += '^' * la
2025-07-01 17:49:05.573 btags += '^' * lb
2025-07-01 17:49:05.573 elif tag == 'delete':
2025-07-01 17:49:05.573 atags += '-' * la
2025-07-01 17:49:05.574 elif tag == 'insert':
2025-07-01 17:49:05.574 btags += '+' * lb
2025-07-01 17:49:05.574 elif tag == 'equal':
2025-07-01 17:49:05.574 atags += ' ' * la
2025-07-01 17:49:05.574 btags += ' ' * lb
2025-07-01 17:49:05.574 else:
2025-07-01 17:49:05.574 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.574 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.574 else:
2025-07-01 17:49:05.574 # the synch pair is identical
2025-07-01 17:49:05.574 yield ' ' + aelt
2025-07-01 17:49:05.574
2025-07-01 17:49:05.574 # pump out diffs from after the synch point
2025-07-01 17:49:05.574 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.574
2025-07-01 17:49:05.574 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.575 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.575
2025-07-01 17:49:05.575 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.575 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.575 alo = 32, ahi = 1101
2025-07-01 17:49:05.575 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.575 blo = 32, bhi = 1101
2025-07-01 17:49:05.575
2025-07-01 17:49:05.575 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.575 g = []
2025-07-01 17:49:05.575 if alo < ahi:
2025-07-01 17:49:05.575 if blo < bhi:
2025-07-01 17:49:05.575 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.575 else:
2025-07-01 17:49:05.575 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.575 elif blo < bhi:
2025-07-01 17:49:05.575 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.576
2025-07-01 17:49:05.576 > yield from g
2025-07-01 17:49:05.576
2025-07-01 17:49:05.576 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.576 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.576
2025-07-01 17:49:05.576 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.576 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.576 alo = 32, ahi = 1101
2025-07-01 17:49:05.576 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.576 blo = 32, bhi = 1101
2025-07-01 17:49:05.576
2025-07-01 17:49:05.576 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.576 r"""
2025-07-01 17:49:05.576 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.576 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.577 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.577 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.577
2025-07-01 17:49:05.577 Example:
2025-07-01 17:49:05.577
2025-07-01 17:49:05.577 >>> d = Differ()
2025-07-01 17:49:05.577 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.577 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.577 >>> print(''.join(results), end="")
2025-07-01 17:49:05.577 - abcDefghiJkl
2025-07-01 17:49:05.577 + abcdefGhijkl
2025-07-01 17:49:05.577 """
2025-07-01 17:49:05.577
2025-07-01 17:49:05.578 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.578 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.578 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.578 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.578 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.578
2025-07-01 17:49:05.578 # search for the pair that matches best without being identical
2025-07-01 17:49:05.578 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.578 # on junk -- unless we have to)
2025-07-01 17:49:05.578 for j in range(blo, bhi):
2025-07-01 17:49:05.578 bj = b[j]
2025-07-01 17:49:05.578 cruncher.set_seq2(bj)
2025-07-01 17:49:05.578 for i in range(alo, ahi):
2025-07-01 17:49:05.578 ai = a[i]
2025-07-01 17:49:05.578 if ai == bj:
2025-07-01 17:49:05.578 if eqi is None:
2025-07-01 17:49:05.578 eqi, eqj = i, j
2025-07-01 17:49:05.579 continue
2025-07-01 17:49:05.579 cruncher.set_seq1(ai)
2025-07-01 17:49:05.579 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.579 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.579 # compares by a factor of 3.
2025-07-01 17:49:05.579 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.579 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.579 # of the computation is cached by cruncher
2025-07-01 17:49:05.579 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.579 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.579 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.579 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.579 if best_ratio < cutoff:
2025-07-01 17:49:05.579 # no non-identical "pretty close" pair
2025-07-01 17:49:05.579 if eqi is None:
2025-07-01 17:49:05.579 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.579 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.580 return
2025-07-01 17:49:05.580 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.580 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.580 else:
2025-07-01 17:49:05.580 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.580 eqi = None
2025-07-01 17:49:05.580
2025-07-01 17:49:05.580 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.580 # identical
2025-07-01 17:49:05.580
2025-07-01 17:49:05.580 # pump out diffs from before the synch point
2025-07-01 17:49:05.580 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.580
2025-07-01 17:49:05.580 # do intraline marking on the synch pair
2025-07-01 17:49:05.580 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.580 if eqi is None:
2025-07-01 17:49:05.581 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.581 atags = btags = ""
2025-07-01 17:49:05.581 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.581 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.581 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.581 if tag == 'replace':
2025-07-01 17:49:05.581 atags += '^' * la
2025-07-01 17:49:05.581 btags += '^' * lb
2025-07-01 17:49:05.581 elif tag == 'delete':
2025-07-01 17:49:05.581 atags += '-' * la
2025-07-01 17:49:05.581 elif tag == 'insert':
2025-07-01 17:49:05.581 btags += '+' * lb
2025-07-01 17:49:05.581 elif tag == 'equal':
2025-07-01 17:49:05.581 atags += ' ' * la
2025-07-01 17:49:05.581 btags += ' ' * lb
2025-07-01 17:49:05.581 else:
2025-07-01 17:49:05.581 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.582 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.582 else:
2025-07-01 17:49:05.582 # the synch pair is identical
2025-07-01 17:49:05.582 yield ' ' + aelt
2025-07-01 17:49:05.582
2025-07-01 17:49:05.582 # pump out diffs from after the synch point
2025-07-01 17:49:05.582 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.582
2025-07-01 17:49:05.582 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.582 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.582
2025-07-01 17:49:05.582 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.582 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.582 alo = 33, ahi = 1101
2025-07-01 17:49:05.582 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.582 blo = 33, bhi = 1101
2025-07-01 17:49:05.583
2025-07-01 17:49:05.583 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.583 g = []
2025-07-01 17:49:05.583 if alo < ahi:
2025-07-01 17:49:05.583 if blo < bhi:
2025-07-01 17:49:05.583 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.583 else:
2025-07-01 17:49:05.583 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.583 elif blo < bhi:
2025-07-01 17:49:05.583 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.583
2025-07-01 17:49:05.583 > yield from g
2025-07-01 17:49:05.583
2025-07-01 17:49:05.583 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.583 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.583
2025-07-01 17:49:05.583 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.587 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.587 alo = 33, ahi = 1101
2025-07-01 17:49:05.587 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.587 blo = 33, bhi = 1101
2025-07-01 17:49:05.587
2025-07-01 17:49:05.587 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.587 r"""
2025-07-01 17:49:05.587 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.587 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.587 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.587 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.587
2025-07-01 17:49:05.588 Example:
2025-07-01 17:49:05.588
2025-07-01 17:49:05.588 >>> d = Differ()
2025-07-01 17:49:05.588 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.588 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.588 >>> print(''.join(results), end="")
2025-07-01 17:49:05.588 - abcDefghiJkl
2025-07-01 17:49:05.588 + abcdefGhijkl
2025-07-01 17:49:05.588 """
2025-07-01 17:49:05.588
2025-07-01 17:49:05.588 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.588 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.588 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.588 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.588 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.589
2025-07-01 17:49:05.589 # search for the pair that matches best without being identical
2025-07-01 17:49:05.589 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.589 # on junk -- unless we have to)
2025-07-01 17:49:05.589 for j in range(blo, bhi):
2025-07-01 17:49:05.589 bj = b[j]
2025-07-01 17:49:05.589 cruncher.set_seq2(bj)
2025-07-01 17:49:05.589 for i in range(alo, ahi):
2025-07-01 17:49:05.589 ai = a[i]
2025-07-01 17:49:05.589 if ai == bj:
2025-07-01 17:49:05.589 if eqi is None:
2025-07-01 17:49:05.589 eqi, eqj = i, j
2025-07-01 17:49:05.589 continue
2025-07-01 17:49:05.589 cruncher.set_seq1(ai)
2025-07-01 17:49:05.589 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.589 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.589 # compares by a factor of 3.
2025-07-01 17:49:05.590 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.590 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.590 # of the computation is cached by cruncher
2025-07-01 17:49:05.590 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.590 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.590 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.590 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.590 if best_ratio < cutoff:
2025-07-01 17:49:05.590 # no non-identical "pretty close" pair
2025-07-01 17:49:05.590 if eqi is None:
2025-07-01 17:49:05.590 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.590 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.590 return
2025-07-01 17:49:05.590 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.590 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.590 else:
2025-07-01 17:49:05.591 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.591 eqi = None
2025-07-01 17:49:05.591
2025-07-01 17:49:05.591 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.591 # identical
2025-07-01 17:49:05.591
2025-07-01 17:49:05.591 # pump out diffs from before the synch point
2025-07-01 17:49:05.591 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.591
2025-07-01 17:49:05.591 # do intraline marking on the synch pair
2025-07-01 17:49:05.591 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.591 if eqi is None:
2025-07-01 17:49:05.591 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.591 atags = btags = ""
2025-07-01 17:49:05.591 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.592 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.592 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.592 if tag == 'replace':
2025-07-01 17:49:05.592 atags += '^' * la
2025-07-01 17:49:05.592 btags += '^' * lb
2025-07-01 17:49:05.592 elif tag == 'delete':
2025-07-01 17:49:05.592 atags += '-' * la
2025-07-01 17:49:05.592 elif tag == 'insert':
2025-07-01 17:49:05.592 btags += '+' * lb
2025-07-01 17:49:05.592 elif tag == 'equal':
2025-07-01 17:49:05.592 atags += ' ' * la
2025-07-01 17:49:05.592 btags += ' ' * lb
2025-07-01 17:49:05.592 else:
2025-07-01 17:49:05.592 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.592 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.592 else:
2025-07-01 17:49:05.593 # the synch pair is identical
2025-07-01 17:49:05.593 yield ' ' + aelt
2025-07-01 17:49:05.593
2025-07-01 17:49:05.593 # pump out diffs from after the synch point
2025-07-01 17:49:05.593 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.593
2025-07-01 17:49:05.593 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.593 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.593
2025-07-01 17:49:05.593 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.593 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.593 alo = 34, ahi = 1101
2025-07-01 17:49:05.593 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.593 blo = 34, bhi = 1101
2025-07-01 17:49:05.593
2025-07-01 17:49:05.593 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.593 g = []
2025-07-01 17:49:05.594 if alo < ahi:
2025-07-01 17:49:05.594 if blo < bhi:
2025-07-01 17:49:05.594 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.594 else:
2025-07-01 17:49:05.594 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.594 elif blo < bhi:
2025-07-01 17:49:05.594 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.594
2025-07-01 17:49:05.594 > yield from g
2025-07-01 17:49:05.594
2025-07-01 17:49:05.594 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.594 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.594
2025-07-01 17:49:05.594 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.594 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.594 alo = 34, ahi = 1101
2025-07-01 17:49:05.595 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.595 blo = 34, bhi = 1101
2025-07-01 17:49:05.595
2025-07-01 17:49:05.595 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.595 r"""
2025-07-01 17:49:05.595 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.595 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.595 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.595 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.595
2025-07-01 17:49:05.595 Example:
2025-07-01 17:49:05.595
2025-07-01 17:49:05.595 >>> d = Differ()
2025-07-01 17:49:05.595 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.595 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.595 >>> print(''.join(results), end="")
2025-07-01 17:49:05.596 - abcDefghiJkl
2025-07-01 17:49:05.596 + abcdefGhijkl
2025-07-01 17:49:05.596 """
2025-07-01 17:49:05.596
2025-07-01 17:49:05.596 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.596 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.596 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.596 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.596 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.596
2025-07-01 17:49:05.596 # search for the pair that matches best without being identical
2025-07-01 17:49:05.596 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.596 # on junk -- unless we have to)
2025-07-01 17:49:05.596 for j in range(blo, bhi):
2025-07-01 17:49:05.596 bj = b[j]
2025-07-01 17:49:05.596 cruncher.set_seq2(bj)
2025-07-01 17:49:05.597 for i in range(alo, ahi):
2025-07-01 17:49:05.597 ai = a[i]
2025-07-01 17:49:05.597 if ai == bj:
2025-07-01 17:49:05.597 if eqi is None:
2025-07-01 17:49:05.597 eqi, eqj = i, j
2025-07-01 17:49:05.597 continue
2025-07-01 17:49:05.597 cruncher.set_seq1(ai)
2025-07-01 17:49:05.597 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.597 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.597 # compares by a factor of 3.
2025-07-01 17:49:05.597 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.597 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.597 # of the computation is cached by cruncher
2025-07-01 17:49:05.597 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.597 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.597 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.598 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.598 if best_ratio < cutoff:
2025-07-01 17:49:05.598 # no non-identical "pretty close" pair
2025-07-01 17:49:05.598 if eqi is None:
2025-07-01 17:49:05.598 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.598 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.598 return
2025-07-01 17:49:05.598 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.598 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.598 else:
2025-07-01 17:49:05.598 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.598 eqi = None
2025-07-01 17:49:05.598
2025-07-01 17:49:05.598 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.598 # identical
2025-07-01 17:49:05.598
2025-07-01 17:49:05.598 # pump out diffs from before the synch point
2025-07-01 17:49:05.599 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.599
2025-07-01 17:49:05.599 # do intraline marking on the synch pair
2025-07-01 17:49:05.599 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.599 if eqi is None:
2025-07-01 17:49:05.599 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.599 atags = btags = ""
2025-07-01 17:49:05.599 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.599 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.599 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.599 if tag == 'replace':
2025-07-01 17:49:05.599 atags += '^' * la
2025-07-01 17:49:05.599 btags += '^' * lb
2025-07-01 17:49:05.599 elif tag == 'delete':
2025-07-01 17:49:05.599 atags += '-' * la
2025-07-01 17:49:05.599 elif tag == 'insert':
2025-07-01 17:49:05.599 btags += '+' * lb
2025-07-01 17:49:05.600 elif tag == 'equal':
2025-07-01 17:49:05.603 atags += ' ' * la
2025-07-01 17:49:05.603 btags += ' ' * lb
2025-07-01 17:49:05.603 else:
2025-07-01 17:49:05.603 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.603 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.603 else:
2025-07-01 17:49:05.603 # the synch pair is identical
2025-07-01 17:49:05.603 yield ' ' + aelt
2025-07-01 17:49:05.603
2025-07-01 17:49:05.603 # pump out diffs from after the synch point
2025-07-01 17:49:05.603 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.604
2025-07-01 17:49:05.604 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.604 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.604
2025-07-01 17:49:05.604 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.604 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.604 alo = 35, ahi = 1101
2025-07-01 17:49:05.604 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.604 blo = 35, bhi = 1101
2025-07-01 17:49:05.604
2025-07-01 17:49:05.604 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.604 g = []
2025-07-01 17:49:05.604 if alo < ahi:
2025-07-01 17:49:05.604 if blo < bhi:
2025-07-01 17:49:05.604 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.605 else:
2025-07-01 17:49:05.605 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.605 elif blo < bhi:
2025-07-01 17:49:05.605 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.605
2025-07-01 17:49:05.605 > yield from g
2025-07-01 17:49:05.605
2025-07-01 17:49:05.605 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.605 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.605
2025-07-01 17:49:05.605 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.605 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.605 alo = 35, ahi = 1101
2025-07-01 17:49:05.605 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.605 blo = 35, bhi = 1101
2025-07-01 17:49:05.605
2025-07-01 17:49:05.606 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.606 r"""
2025-07-01 17:49:05.606 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.606 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.606 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.606 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.606
2025-07-01 17:49:05.606 Example:
2025-07-01 17:49:05.606
2025-07-01 17:49:05.606 >>> d = Differ()
2025-07-01 17:49:05.606 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.606 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.606 >>> print(''.join(results), end="")
2025-07-01 17:49:05.606 - abcDefghiJkl
2025-07-01 17:49:05.606 + abcdefGhijkl
2025-07-01 17:49:05.607 """
2025-07-01 17:49:05.607
2025-07-01 17:49:05.607 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.607 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.607 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.607 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.607 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.607
2025-07-01 17:49:05.607 # search for the pair that matches best without being identical
2025-07-01 17:49:05.607 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.607 # on junk -- unless we have to)
2025-07-01 17:49:05.607 for j in range(blo, bhi):
2025-07-01 17:49:05.607 bj = b[j]
2025-07-01 17:49:05.607 cruncher.set_seq2(bj)
2025-07-01 17:49:05.607 for i in range(alo, ahi):
2025-07-01 17:49:05.607 ai = a[i]
2025-07-01 17:49:05.608 if ai == bj:
2025-07-01 17:49:05.608 if eqi is None:
2025-07-01 17:49:05.608 eqi, eqj = i, j
2025-07-01 17:49:05.608 continue
2025-07-01 17:49:05.608 cruncher.set_seq1(ai)
2025-07-01 17:49:05.608 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.608 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.608 # compares by a factor of 3.
2025-07-01 17:49:05.608 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.608 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.608 # of the computation is cached by cruncher
2025-07-01 17:49:05.608 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.608 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.608 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.608 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.609 if best_ratio < cutoff:
2025-07-01 17:49:05.609 # no non-identical "pretty close" pair
2025-07-01 17:49:05.609 if eqi is None:
2025-07-01 17:49:05.609 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.609 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.609 return
2025-07-01 17:49:05.609 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.609 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.609 else:
2025-07-01 17:49:05.609 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.609 eqi = None
2025-07-01 17:49:05.609
2025-07-01 17:49:05.609 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.609 # identical
2025-07-01 17:49:05.609
2025-07-01 17:49:05.609 # pump out diffs from before the synch point
2025-07-01 17:49:05.609 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.610
2025-07-01 17:49:05.610 # do intraline marking on the synch pair
2025-07-01 17:49:05.610 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.610 if eqi is None:
2025-07-01 17:49:05.610 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.610 atags = btags = ""
2025-07-01 17:49:05.610 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.610 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.610 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.610 if tag == 'replace':
2025-07-01 17:49:05.610 atags += '^' * la
2025-07-01 17:49:05.610 btags += '^' * lb
2025-07-01 17:49:05.610 elif tag == 'delete':
2025-07-01 17:49:05.610 atags += '-' * la
2025-07-01 17:49:05.610 elif tag == 'insert':
2025-07-01 17:49:05.610 btags += '+' * lb
2025-07-01 17:49:05.610 elif tag == 'equal':
2025-07-01 17:49:05.611 atags += ' ' * la
2025-07-01 17:49:05.611 btags += ' ' * lb
2025-07-01 17:49:05.611 else:
2025-07-01 17:49:05.611 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.611 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.611 else:
2025-07-01 17:49:05.611 # the synch pair is identical
2025-07-01 17:49:05.611 yield ' ' + aelt
2025-07-01 17:49:05.611
2025-07-01 17:49:05.611 # pump out diffs from after the synch point
2025-07-01 17:49:05.611 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.611
2025-07-01 17:49:05.611 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.611 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.611
2025-07-01 17:49:05.611 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.611 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.611 alo = 36, ahi = 1101
2025-07-01 17:49:05.612 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.612 blo = 36, bhi = 1101
2025-07-01 17:49:05.612
2025-07-01 17:49:05.612 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.612 g = []
2025-07-01 17:49:05.612 if alo < ahi:
2025-07-01 17:49:05.612 if blo < bhi:
2025-07-01 17:49:05.612 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.612 else:
2025-07-01 17:49:05.612 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.612 elif blo < bhi:
2025-07-01 17:49:05.612 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.612
2025-07-01 17:49:05.612 > yield from g
2025-07-01 17:49:05.612
2025-07-01 17:49:05.612 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.612 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.612
2025-07-01 17:49:05.613 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.613 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.613 alo = 36, ahi = 1101
2025-07-01 17:49:05.613 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.613 blo = 36, bhi = 1101
2025-07-01 17:49:05.613
2025-07-01 17:49:05.613 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.613 r"""
2025-07-01 17:49:05.613 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.613 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.613 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.613 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.613
2025-07-01 17:49:05.613 Example:
2025-07-01 17:49:05.613
2025-07-01 17:49:05.613 >>> d = Differ()
2025-07-01 17:49:05.614 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.614 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.614 >>> print(''.join(results), end="")
2025-07-01 17:49:05.614 - abcDefghiJkl
2025-07-01 17:49:05.614 + abcdefGhijkl
2025-07-01 17:49:05.614 """
2025-07-01 17:49:05.614
2025-07-01 17:49:05.614 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.614 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.614 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.614 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.614 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.614
2025-07-01 17:49:05.614 # search for the pair that matches best without being identical
2025-07-01 17:49:05.614 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.614 # on junk -- unless we have to)
2025-07-01 17:49:05.615 for j in range(blo, bhi):
2025-07-01 17:49:05.615 bj = b[j]
2025-07-01 17:49:05.615 cruncher.set_seq2(bj)
2025-07-01 17:49:05.615 for i in range(alo, ahi):
2025-07-01 17:49:05.615 ai = a[i]
2025-07-01 17:49:05.615 if ai == bj:
2025-07-01 17:49:05.615 if eqi is None:
2025-07-01 17:49:05.615 eqi, eqj = i, j
2025-07-01 17:49:05.615 continue
2025-07-01 17:49:05.615 cruncher.set_seq1(ai)
2025-07-01 17:49:05.615 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.615 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.615 # compares by a factor of 3.
2025-07-01 17:49:05.615 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.615 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.615 # of the computation is cached by cruncher
2025-07-01 17:49:05.615 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.615 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.615 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.616 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.619 if best_ratio < cutoff:
2025-07-01 17:49:05.619 # no non-identical "pretty close" pair
2025-07-01 17:49:05.619 if eqi is None:
2025-07-01 17:49:05.619 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.619 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.619 return
2025-07-01 17:49:05.619 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.619 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.619 else:
2025-07-01 17:49:05.619 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.619 eqi = None
2025-07-01 17:49:05.619
2025-07-01 17:49:05.619 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.619 # identical
2025-07-01 17:49:05.619
2025-07-01 17:49:05.619 # pump out diffs from before the synch point
2025-07-01 17:49:05.619 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.619
2025-07-01 17:49:05.619 # do intraline marking on the synch pair
2025-07-01 17:49:05.619 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.619 if eqi is None:
2025-07-01 17:49:05.620 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.620 atags = btags = ""
2025-07-01 17:49:05.620 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.620 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.620 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.620 if tag == 'replace':
2025-07-01 17:49:05.620 atags += '^' * la
2025-07-01 17:49:05.620 btags += '^' * lb
2025-07-01 17:49:05.620 elif tag == 'delete':
2025-07-01 17:49:05.620 atags += '-' * la
2025-07-01 17:49:05.620 elif tag == 'insert':
2025-07-01 17:49:05.620 btags += '+' * lb
2025-07-01 17:49:05.620 elif tag == 'equal':
2025-07-01 17:49:05.620 atags += ' ' * la
2025-07-01 17:49:05.620 btags += ' ' * lb
2025-07-01 17:49:05.620 else:
2025-07-01 17:49:05.620 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.620 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.620 else:
2025-07-01 17:49:05.620 # the synch pair is identical
2025-07-01 17:49:05.621 yield ' ' + aelt
2025-07-01 17:49:05.621
2025-07-01 17:49:05.621 # pump out diffs from after the synch point
2025-07-01 17:49:05.621 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.621
2025-07-01 17:49:05.621 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.621 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.621
2025-07-01 17:49:05.621 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.621 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.621 alo = 37, ahi = 1101
2025-07-01 17:49:05.621 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.621 blo = 37, bhi = 1101
2025-07-01 17:49:05.621
2025-07-01 17:49:05.621 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.621 g = []
2025-07-01 17:49:05.621 if alo < ahi:
2025-07-01 17:49:05.621 if blo < bhi:
2025-07-01 17:49:05.621 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.621 else:
2025-07-01 17:49:05.622 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.622 elif blo < bhi:
2025-07-01 17:49:05.622 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.622
2025-07-01 17:49:05.622 > yield from g
2025-07-01 17:49:05.622
2025-07-01 17:49:05.622 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.622 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.622
2025-07-01 17:49:05.622 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.622 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.622 alo = 37, ahi = 1101
2025-07-01 17:49:05.622 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.622 blo = 37, bhi = 1101
2025-07-01 17:49:05.622
2025-07-01 17:49:05.622 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.622 r"""
2025-07-01 17:49:05.622 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.622 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.622 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.623 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.623
2025-07-01 17:49:05.623 Example:
2025-07-01 17:49:05.623
2025-07-01 17:49:05.623 >>> d = Differ()
2025-07-01 17:49:05.623 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.623 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.623 >>> print(''.join(results), end="")
2025-07-01 17:49:05.623 - abcDefghiJkl
2025-07-01 17:49:05.623 + abcdefGhijkl
2025-07-01 17:49:05.623 """
2025-07-01 17:49:05.623
2025-07-01 17:49:05.623 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.623 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.623 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.623 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.623 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.623
2025-07-01 17:49:05.624 # search for the pair that matches best without being identical
2025-07-01 17:49:05.624 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.624 # on junk -- unless we have to)
2025-07-01 17:49:05.624 for j in range(blo, bhi):
2025-07-01 17:49:05.624 bj = b[j]
2025-07-01 17:49:05.624 cruncher.set_seq2(bj)
2025-07-01 17:49:05.624 for i in range(alo, ahi):
2025-07-01 17:49:05.624 ai = a[i]
2025-07-01 17:49:05.624 if ai == bj:
2025-07-01 17:49:05.624 if eqi is None:
2025-07-01 17:49:05.624 eqi, eqj = i, j
2025-07-01 17:49:05.624 continue
2025-07-01 17:49:05.624 cruncher.set_seq1(ai)
2025-07-01 17:49:05.624 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.624 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.624 # compares by a factor of 3.
2025-07-01 17:49:05.624 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.624 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.624 # of the computation is cached by cruncher
2025-07-01 17:49:05.624 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.624 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.625 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.625 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.625 if best_ratio < cutoff:
2025-07-01 17:49:05.625 # no non-identical "pretty close" pair
2025-07-01 17:49:05.625 if eqi is None:
2025-07-01 17:49:05.625 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.625 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.625 return
2025-07-01 17:49:05.625 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.625 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.625 else:
2025-07-01 17:49:05.625 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.625 eqi = None
2025-07-01 17:49:05.625
2025-07-01 17:49:05.625 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.625 # identical
2025-07-01 17:49:05.625
2025-07-01 17:49:05.625 # pump out diffs from before the synch point
2025-07-01 17:49:05.625 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.625
2025-07-01 17:49:05.625 # do intraline marking on the synch pair
2025-07-01 17:49:05.626 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.626 if eqi is None:
2025-07-01 17:49:05.626 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.626 atags = btags = ""
2025-07-01 17:49:05.626 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.626 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.626 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.626 if tag == 'replace':
2025-07-01 17:49:05.626 atags += '^' * la
2025-07-01 17:49:05.626 btags += '^' * lb
2025-07-01 17:49:05.626 elif tag == 'delete':
2025-07-01 17:49:05.626 atags += '-' * la
2025-07-01 17:49:05.626 elif tag == 'insert':
2025-07-01 17:49:05.626 btags += '+' * lb
2025-07-01 17:49:05.626 elif tag == 'equal':
2025-07-01 17:49:05.626 atags += ' ' * la
2025-07-01 17:49:05.626 btags += ' ' * lb
2025-07-01 17:49:05.626 else:
2025-07-01 17:49:05.626 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.626 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.627 else:
2025-07-01 17:49:05.627 # the synch pair is identical
2025-07-01 17:49:05.627 yield ' ' + aelt
2025-07-01 17:49:05.627
2025-07-01 17:49:05.627 # pump out diffs from after the synch point
2025-07-01 17:49:05.627 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.627
2025-07-01 17:49:05.627 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.627 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.627
2025-07-01 17:49:05.627 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.627 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.627 alo = 38, ahi = 1101
2025-07-01 17:49:05.627 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.627 blo = 38, bhi = 1101
2025-07-01 17:49:05.627
2025-07-01 17:49:05.627 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.627 g = []
2025-07-01 17:49:05.627 if alo < ahi:
2025-07-01 17:49:05.627 if blo < bhi:
2025-07-01 17:49:05.628 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.628 else:
2025-07-01 17:49:05.628 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.628 elif blo < bhi:
2025-07-01 17:49:05.628 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.628
2025-07-01 17:49:05.628 > yield from g
2025-07-01 17:49:05.628
2025-07-01 17:49:05.628 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.628 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.628
2025-07-01 17:49:05.628 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.628 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.628 alo = 38, ahi = 1101
2025-07-01 17:49:05.628 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.628 blo = 38, bhi = 1101
2025-07-01 17:49:05.628
2025-07-01 17:49:05.628 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.628 r"""
2025-07-01 17:49:05.628 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.629 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.629 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.629 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.629
2025-07-01 17:49:05.629 Example:
2025-07-01 17:49:05.629
2025-07-01 17:49:05.629 >>> d = Differ()
2025-07-01 17:49:05.629 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.629 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.629 >>> print(''.join(results), end="")
2025-07-01 17:49:05.629 - abcDefghiJkl
2025-07-01 17:49:05.629 + abcdefGhijkl
2025-07-01 17:49:05.629 """
2025-07-01 17:49:05.629
2025-07-01 17:49:05.629 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.629 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.629 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.630 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.630 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.630
2025-07-01 17:49:05.630 # search for the pair that matches best without being identical
2025-07-01 17:49:05.630 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.630 # on junk -- unless we have to)
2025-07-01 17:49:05.630 for j in range(blo, bhi):
2025-07-01 17:49:05.630 bj = b[j]
2025-07-01 17:49:05.630 cruncher.set_seq2(bj)
2025-07-01 17:49:05.630 for i in range(alo, ahi):
2025-07-01 17:49:05.630 ai = a[i]
2025-07-01 17:49:05.630 if ai == bj:
2025-07-01 17:49:05.630 if eqi is None:
2025-07-01 17:49:05.630 eqi, eqj = i, j
2025-07-01 17:49:05.630 continue
2025-07-01 17:49:05.630 cruncher.set_seq1(ai)
2025-07-01 17:49:05.630 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.630 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.630 # compares by a factor of 3.
2025-07-01 17:49:05.630 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.630 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.631 # of the computation is cached by cruncher
2025-07-01 17:49:05.631 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.631 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.631 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.631 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.631 if best_ratio < cutoff:
2025-07-01 17:49:05.631 # no non-identical "pretty close" pair
2025-07-01 17:49:05.631 if eqi is None:
2025-07-01 17:49:05.631 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.631 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.631 return
2025-07-01 17:49:05.631 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.631 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.631 else:
2025-07-01 17:49:05.631 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.631 eqi = None
2025-07-01 17:49:05.631
2025-07-01 17:49:05.631 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.631 # identical
2025-07-01 17:49:05.631
2025-07-01 17:49:05.631 # pump out diffs from before the synch point
2025-07-01 17:49:05.637 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.637
2025-07-01 17:49:05.637 # do intraline marking on the synch pair
2025-07-01 17:49:05.637 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.637 if eqi is None:
2025-07-01 17:49:05.637 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.637 atags = btags = ""
2025-07-01 17:49:05.637 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.637 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.637 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.637 if tag == 'replace':
2025-07-01 17:49:05.637 atags += '^' * la
2025-07-01 17:49:05.637 btags += '^' * lb
2025-07-01 17:49:05.637 elif tag == 'delete':
2025-07-01 17:49:05.637 atags += '-' * la
2025-07-01 17:49:05.637 elif tag == 'insert':
2025-07-01 17:49:05.637 btags += '+' * lb
2025-07-01 17:49:05.637 elif tag == 'equal':
2025-07-01 17:49:05.637 atags += ' ' * la
2025-07-01 17:49:05.638 btags += ' ' * lb
2025-07-01 17:49:05.638 else:
2025-07-01 17:49:05.638 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.638 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.638 else:
2025-07-01 17:49:05.638 # the synch pair is identical
2025-07-01 17:49:05.638 yield ' ' + aelt
2025-07-01 17:49:05.638
2025-07-01 17:49:05.638 # pump out diffs from after the synch point
2025-07-01 17:49:05.638 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.638
2025-07-01 17:49:05.638 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.638 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.638
2025-07-01 17:49:05.638 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.638 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.638 alo = 39, ahi = 1101
2025-07-01 17:49:05.638 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.638 blo = 39, bhi = 1101
2025-07-01 17:49:05.638
2025-07-01 17:49:05.638 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.639 g = []
2025-07-01 17:49:05.639 if alo < ahi:
2025-07-01 17:49:05.639 if blo < bhi:
2025-07-01 17:49:05.639 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.639 else:
2025-07-01 17:49:05.639 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.639 elif blo < bhi:
2025-07-01 17:49:05.639 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.639
2025-07-01 17:49:05.639 > yield from g
2025-07-01 17:49:05.639
2025-07-01 17:49:05.639 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.639 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.639
2025-07-01 17:49:05.639 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.639 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.639 alo = 39, ahi = 1101
2025-07-01 17:49:05.639 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.639 blo = 39, bhi = 1101
2025-07-01 17:49:05.639
2025-07-01 17:49:05.640 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.640 r"""
2025-07-01 17:49:05.640 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.640 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.640 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.640 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.640
2025-07-01 17:49:05.640 Example:
2025-07-01 17:49:05.640
2025-07-01 17:49:05.640 >>> d = Differ()
2025-07-01 17:49:05.640 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.640 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.640 >>> print(''.join(results), end="")
2025-07-01 17:49:05.640 - abcDefghiJkl
2025-07-01 17:49:05.640 + abcdefGhijkl
2025-07-01 17:49:05.640 """
2025-07-01 17:49:05.640
2025-07-01 17:49:05.641 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.641 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.641 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.641 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.641 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.641
2025-07-01 17:49:05.641 # search for the pair that matches best without being identical
2025-07-01 17:49:05.641 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.641 # on junk -- unless we have to)
2025-07-01 17:49:05.641 for j in range(blo, bhi):
2025-07-01 17:49:05.641 bj = b[j]
2025-07-01 17:49:05.641 cruncher.set_seq2(bj)
2025-07-01 17:49:05.641 for i in range(alo, ahi):
2025-07-01 17:49:05.641 ai = a[i]
2025-07-01 17:49:05.641 if ai == bj:
2025-07-01 17:49:05.641 if eqi is None:
2025-07-01 17:49:05.641 eqi, eqj = i, j
2025-07-01 17:49:05.641 continue
2025-07-01 17:49:05.641 cruncher.set_seq1(ai)
2025-07-01 17:49:05.641 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.641 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.642 # compares by a factor of 3.
2025-07-01 17:49:05.642 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.642 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.642 # of the computation is cached by cruncher
2025-07-01 17:49:05.642 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.642 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.642 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.642 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.642 if best_ratio < cutoff:
2025-07-01 17:49:05.642 # no non-identical "pretty close" pair
2025-07-01 17:49:05.642 if eqi is None:
2025-07-01 17:49:05.642 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.642 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.642 return
2025-07-01 17:49:05.642 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.642 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.642 else:
2025-07-01 17:49:05.642 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.642 eqi = None
2025-07-01 17:49:05.643
2025-07-01 17:49:05.643 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.643 # identical
2025-07-01 17:49:05.643
2025-07-01 17:49:05.643 # pump out diffs from before the synch point
2025-07-01 17:49:05.643 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.643
2025-07-01 17:49:05.643 # do intraline marking on the synch pair
2025-07-01 17:49:05.643 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.643 if eqi is None:
2025-07-01 17:49:05.643 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.643 atags = btags = ""
2025-07-01 17:49:05.643 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.643 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.643 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.643 if tag == 'replace':
2025-07-01 17:49:05.643 atags += '^' * la
2025-07-01 17:49:05.643 btags += '^' * lb
2025-07-01 17:49:05.643 elif tag == 'delete':
2025-07-01 17:49:05.643 atags += '-' * la
2025-07-01 17:49:05.643 elif tag == 'insert':
2025-07-01 17:49:05.644 btags += '+' * lb
2025-07-01 17:49:05.644 elif tag == 'equal':
2025-07-01 17:49:05.644 atags += ' ' * la
2025-07-01 17:49:05.644 btags += ' ' * lb
2025-07-01 17:49:05.644 else:
2025-07-01 17:49:05.644 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.644 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.644 else:
2025-07-01 17:49:05.644 # the synch pair is identical
2025-07-01 17:49:05.644 yield ' ' + aelt
2025-07-01 17:49:05.644
2025-07-01 17:49:05.644 # pump out diffs from after the synch point
2025-07-01 17:49:05.644 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.644
2025-07-01 17:49:05.644 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.644 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.644
2025-07-01 17:49:05.644 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.644 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.644 alo = 40, ahi = 1101
2025-07-01 17:49:05.644 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.645 blo = 40, bhi = 1101
2025-07-01 17:49:05.645
2025-07-01 17:49:05.645 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.645 g = []
2025-07-01 17:49:05.645 if alo < ahi:
2025-07-01 17:49:05.645 if blo < bhi:
2025-07-01 17:49:05.645 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.645 else:
2025-07-01 17:49:05.645 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.645 elif blo < bhi:
2025-07-01 17:49:05.645 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.645
2025-07-01 17:49:05.645 > yield from g
2025-07-01 17:49:05.645
2025-07-01 17:49:05.645 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.645 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.645
2025-07-01 17:49:05.645 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.645 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.645 alo = 40, ahi = 1101
2025-07-01 17:49:05.646 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.646 blo = 40, bhi = 1101
2025-07-01 17:49:05.646
2025-07-01 17:49:05.646 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.646 r"""
2025-07-01 17:49:05.646 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.646 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.646 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.646 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.646
2025-07-01 17:49:05.646 Example:
2025-07-01 17:49:05.646
2025-07-01 17:49:05.646 >>> d = Differ()
2025-07-01 17:49:05.646 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.646 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.646 >>> print(''.join(results), end="")
2025-07-01 17:49:05.646 - abcDefghiJkl
2025-07-01 17:49:05.646 + abcdefGhijkl
2025-07-01 17:49:05.647 """
2025-07-01 17:49:05.647
2025-07-01 17:49:05.647 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.647 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.647 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.647 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.647 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.647
2025-07-01 17:49:05.647 # search for the pair that matches best without being identical
2025-07-01 17:49:05.647 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.647 # on junk -- unless we have to)
2025-07-01 17:49:05.647 for j in range(blo, bhi):
2025-07-01 17:49:05.647 bj = b[j]
2025-07-01 17:49:05.647 cruncher.set_seq2(bj)
2025-07-01 17:49:05.647 for i in range(alo, ahi):
2025-07-01 17:49:05.647 ai = a[i]
2025-07-01 17:49:05.647 if ai == bj:
2025-07-01 17:49:05.647 if eqi is None:
2025-07-01 17:49:05.647 eqi, eqj = i, j
2025-07-01 17:49:05.647 continue
2025-07-01 17:49:05.647 cruncher.set_seq1(ai)
2025-07-01 17:49:05.650 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.650 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.650 # compares by a factor of 3.
2025-07-01 17:49:05.651 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.651 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.651 # of the computation is cached by cruncher
2025-07-01 17:49:05.651 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.651 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.651 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.651 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.651 if best_ratio < cutoff:
2025-07-01 17:49:05.651 # no non-identical "pretty close" pair
2025-07-01 17:49:05.651 if eqi is None:
2025-07-01 17:49:05.651 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.651 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.651 return
2025-07-01 17:49:05.651 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.651 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.651 else:
2025-07-01 17:49:05.651 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.651 eqi = None
2025-07-01 17:49:05.652
2025-07-01 17:49:05.652 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.652 # identical
2025-07-01 17:49:05.652
2025-07-01 17:49:05.652 # pump out diffs from before the synch point
2025-07-01 17:49:05.652 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.652
2025-07-01 17:49:05.652 # do intraline marking on the synch pair
2025-07-01 17:49:05.652 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.652 if eqi is None:
2025-07-01 17:49:05.652 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.652 atags = btags = ""
2025-07-01 17:49:05.652 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.652 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.652 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.652 if tag == 'replace':
2025-07-01 17:49:05.652 atags += '^' * la
2025-07-01 17:49:05.652 btags += '^' * lb
2025-07-01 17:49:05.652 elif tag == 'delete':
2025-07-01 17:49:05.652 atags += '-' * la
2025-07-01 17:49:05.653 elif tag == 'insert':
2025-07-01 17:49:05.653 btags += '+' * lb
2025-07-01 17:49:05.653 elif tag == 'equal':
2025-07-01 17:49:05.653 atags += ' ' * la
2025-07-01 17:49:05.653 btags += ' ' * lb
2025-07-01 17:49:05.653 else:
2025-07-01 17:49:05.653 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.653 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.653 else:
2025-07-01 17:49:05.653 # the synch pair is identical
2025-07-01 17:49:05.653 yield ' ' + aelt
2025-07-01 17:49:05.653
2025-07-01 17:49:05.653 # pump out diffs from after the synch point
2025-07-01 17:49:05.653 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.653
2025-07-01 17:49:05.653 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.653 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.653
2025-07-01 17:49:05.653 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.653 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.654 alo = 41, ahi = 1101
2025-07-01 17:49:05.654 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.654 blo = 41, bhi = 1101
2025-07-01 17:49:05.654
2025-07-01 17:49:05.654 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.654 g = []
2025-07-01 17:49:05.654 if alo < ahi:
2025-07-01 17:49:05.654 if blo < bhi:
2025-07-01 17:49:05.654 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.654 else:
2025-07-01 17:49:05.654 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.654 elif blo < bhi:
2025-07-01 17:49:05.654 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.654
2025-07-01 17:49:05.654 > yield from g
2025-07-01 17:49:05.654
2025-07-01 17:49:05.654 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.654 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.654
2025-07-01 17:49:05.654 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.654 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.655 alo = 41, ahi = 1101
2025-07-01 17:49:05.655 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.655 blo = 41, bhi = 1101
2025-07-01 17:49:05.655
2025-07-01 17:49:05.655 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.655 r"""
2025-07-01 17:49:05.655 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.655 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.655 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.655 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.655
2025-07-01 17:49:05.655 Example:
2025-07-01 17:49:05.655
2025-07-01 17:49:05.655 >>> d = Differ()
2025-07-01 17:49:05.655 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.655 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.655 >>> print(''.join(results), end="")
2025-07-01 17:49:05.655 - abcDefghiJkl
2025-07-01 17:49:05.655 + abcdefGhijkl
2025-07-01 17:49:05.656 """
2025-07-01 17:49:05.656
2025-07-01 17:49:05.656 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.656 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.656 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.656 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.656 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.656
2025-07-01 17:49:05.656 # search for the pair that matches best without being identical
2025-07-01 17:49:05.656 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.656 # on junk -- unless we have to)
2025-07-01 17:49:05.656 for j in range(blo, bhi):
2025-07-01 17:49:05.656 bj = b[j]
2025-07-01 17:49:05.656 cruncher.set_seq2(bj)
2025-07-01 17:49:05.656 for i in range(alo, ahi):
2025-07-01 17:49:05.656 ai = a[i]
2025-07-01 17:49:05.656 if ai == bj:
2025-07-01 17:49:05.656 if eqi is None:
2025-07-01 17:49:05.656 eqi, eqj = i, j
2025-07-01 17:49:05.656 continue
2025-07-01 17:49:05.657 cruncher.set_seq1(ai)
2025-07-01 17:49:05.657 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.657 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.657 # compares by a factor of 3.
2025-07-01 17:49:05.657 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.657 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.657 # of the computation is cached by cruncher
2025-07-01 17:49:05.657 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.657 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.657 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.657 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.657 if best_ratio < cutoff:
2025-07-01 17:49:05.657 # no non-identical "pretty close" pair
2025-07-01 17:49:05.657 if eqi is None:
2025-07-01 17:49:05.657 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.657 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.657 return
2025-07-01 17:49:05.657 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.657 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.657 else:
2025-07-01 17:49:05.658 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.658 eqi = None
2025-07-01 17:49:05.658
2025-07-01 17:49:05.658 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.658 # identical
2025-07-01 17:49:05.658
2025-07-01 17:49:05.658 # pump out diffs from before the synch point
2025-07-01 17:49:05.658 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.658
2025-07-01 17:49:05.658 # do intraline marking on the synch pair
2025-07-01 17:49:05.658 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.658 if eqi is None:
2025-07-01 17:49:05.658 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.658 atags = btags = ""
2025-07-01 17:49:05.658 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.658 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.658 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.658 if tag == 'replace':
2025-07-01 17:49:05.658 atags += '^' * la
2025-07-01 17:49:05.658 btags += '^' * lb
2025-07-01 17:49:05.658 elif tag == 'delete':
2025-07-01 17:49:05.659 atags += '-' * la
2025-07-01 17:49:05.659 elif tag == 'insert':
2025-07-01 17:49:05.659 btags += '+' * lb
2025-07-01 17:49:05.659 elif tag == 'equal':
2025-07-01 17:49:05.659 atags += ' ' * la
2025-07-01 17:49:05.659 btags += ' ' * lb
2025-07-01 17:49:05.659 else:
2025-07-01 17:49:05.659 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.659 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.659 else:
2025-07-01 17:49:05.659 # the synch pair is identical
2025-07-01 17:49:05.659 yield ' ' + aelt
2025-07-01 17:49:05.659
2025-07-01 17:49:05.659 # pump out diffs from after the synch point
2025-07-01 17:49:05.659 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.659
2025-07-01 17:49:05.659 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.659 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.659
2025-07-01 17:49:05.659 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.659 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.660 alo = 42, ahi = 1101
2025-07-01 17:49:05.660 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.660 blo = 42, bhi = 1101
2025-07-01 17:49:05.660
2025-07-01 17:49:05.660 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.660 g = []
2025-07-01 17:49:05.660 if alo < ahi:
2025-07-01 17:49:05.660 if blo < bhi:
2025-07-01 17:49:05.660 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.660 else:
2025-07-01 17:49:05.660 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.660 elif blo < bhi:
2025-07-01 17:49:05.660 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.660
2025-07-01 17:49:05.660 > yield from g
2025-07-01 17:49:05.660
2025-07-01 17:49:05.660 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.660 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.660
2025-07-01 17:49:05.661 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.661 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.661 alo = 42, ahi = 1101
2025-07-01 17:49:05.661 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.661 blo = 42, bhi = 1101
2025-07-01 17:49:05.661
2025-07-01 17:49:05.661 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.661 r"""
2025-07-01 17:49:05.661 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.661 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.661 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.661 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.661
2025-07-01 17:49:05.661 Example:
2025-07-01 17:49:05.661
2025-07-01 17:49:05.661 >>> d = Differ()
2025-07-01 17:49:05.661 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.661 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.661 >>> print(''.join(results), end="")
2025-07-01 17:49:05.661 - abcDefghiJkl
2025-07-01 17:49:05.662 + abcdefGhijkl
2025-07-01 17:49:05.662 """
2025-07-01 17:49:05.662
2025-07-01 17:49:05.662 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.662 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.662 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.662 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.662 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.662
2025-07-01 17:49:05.662 # search for the pair that matches best without being identical
2025-07-01 17:49:05.662 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.662 # on junk -- unless we have to)
2025-07-01 17:49:05.662 for j in range(blo, bhi):
2025-07-01 17:49:05.662 bj = b[j]
2025-07-01 17:49:05.662 cruncher.set_seq2(bj)
2025-07-01 17:49:05.662 for i in range(alo, ahi):
2025-07-01 17:49:05.662 ai = a[i]
2025-07-01 17:49:05.662 if ai == bj:
2025-07-01 17:49:05.662 if eqi is None:
2025-07-01 17:49:05.663 eqi, eqj = i, j
2025-07-01 17:49:05.663 continue
2025-07-01 17:49:05.663 cruncher.set_seq1(ai)
2025-07-01 17:49:05.663 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.663 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.663 # compares by a factor of 3.
2025-07-01 17:49:05.663 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.663 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.663 # of the computation is cached by cruncher
2025-07-01 17:49:05.663 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.663 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.663 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.663 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.663 if best_ratio < cutoff:
2025-07-01 17:49:05.663 # no non-identical "pretty close" pair
2025-07-01 17:49:05.663 if eqi is None:
2025-07-01 17:49:05.663 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.663 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.663 return
2025-07-01 17:49:05.663 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.664 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.669 else:
2025-07-01 17:49:05.669 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.669 eqi = None
2025-07-01 17:49:05.669
2025-07-01 17:49:05.669 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.669 # identical
2025-07-01 17:49:05.669
2025-07-01 17:49:05.669 # pump out diffs from before the synch point
2025-07-01 17:49:05.669 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.669
2025-07-01 17:49:05.669 # do intraline marking on the synch pair
2025-07-01 17:49:05.669 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.669 if eqi is None:
2025-07-01 17:49:05.669 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.669 atags = btags = ""
2025-07-01 17:49:05.669 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.669 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.669 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.669 if tag == 'replace':
2025-07-01 17:49:05.669 atags += '^' * la
2025-07-01 17:49:05.669 btags += '^' * lb
2025-07-01 17:49:05.670 elif tag == 'delete':
2025-07-01 17:49:05.670 atags += '-' * la
2025-07-01 17:49:05.670 elif tag == 'insert':
2025-07-01 17:49:05.670 btags += '+' * lb
2025-07-01 17:49:05.670 elif tag == 'equal':
2025-07-01 17:49:05.670 atags += ' ' * la
2025-07-01 17:49:05.670 btags += ' ' * lb
2025-07-01 17:49:05.670 else:
2025-07-01 17:49:05.670 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.670 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.670 else:
2025-07-01 17:49:05.670 # the synch pair is identical
2025-07-01 17:49:05.670 yield ' ' + aelt
2025-07-01 17:49:05.670
2025-07-01 17:49:05.670 # pump out diffs from after the synch point
2025-07-01 17:49:05.670 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.670
2025-07-01 17:49:05.670 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.670 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.670
2025-07-01 17:49:05.671 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.671 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.671 alo = 43, ahi = 1101
2025-07-01 17:49:05.671 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.671 blo = 43, bhi = 1101
2025-07-01 17:49:05.671
2025-07-01 17:49:05.671 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.671 g = []
2025-07-01 17:49:05.671 if alo < ahi:
2025-07-01 17:49:05.671 if blo < bhi:
2025-07-01 17:49:05.671 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.671 else:
2025-07-01 17:49:05.671 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.671 elif blo < bhi:
2025-07-01 17:49:05.671 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.671
2025-07-01 17:49:05.671 > yield from g
2025-07-01 17:49:05.671
2025-07-01 17:49:05.671 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.671 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.671
2025-07-01 17:49:05.672 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.672 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.672 alo = 43, ahi = 1101
2025-07-01 17:49:05.672 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.672 blo = 43, bhi = 1101
2025-07-01 17:49:05.672
2025-07-01 17:49:05.672 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.672 r"""
2025-07-01 17:49:05.672 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.672 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.672 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.672 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.672
2025-07-01 17:49:05.672 Example:
2025-07-01 17:49:05.672
2025-07-01 17:49:05.672 >>> d = Differ()
2025-07-01 17:49:05.672 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.672 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.672 >>> print(''.join(results), end="")
2025-07-01 17:49:05.672 - abcDefghiJkl
2025-07-01 17:49:05.673 + abcdefGhijkl
2025-07-01 17:49:05.673 """
2025-07-01 17:49:05.673
2025-07-01 17:49:05.673 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.673 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.673 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.673 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.673 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.673
2025-07-01 17:49:05.673 # search for the pair that matches best without being identical
2025-07-01 17:49:05.673 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.673 # on junk -- unless we have to)
2025-07-01 17:49:05.673 for j in range(blo, bhi):
2025-07-01 17:49:05.673 bj = b[j]
2025-07-01 17:49:05.673 cruncher.set_seq2(bj)
2025-07-01 17:49:05.673 for i in range(alo, ahi):
2025-07-01 17:49:05.673 ai = a[i]
2025-07-01 17:49:05.673 if ai == bj:
2025-07-01 17:49:05.674 if eqi is None:
2025-07-01 17:49:05.674 eqi, eqj = i, j
2025-07-01 17:49:05.674 continue
2025-07-01 17:49:05.674 cruncher.set_seq1(ai)
2025-07-01 17:49:05.674 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.674 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.674 # compares by a factor of 3.
2025-07-01 17:49:05.674 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.674 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.674 # of the computation is cached by cruncher
2025-07-01 17:49:05.674 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.674 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.674 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.674 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.674 if best_ratio < cutoff:
2025-07-01 17:49:05.674 # no non-identical "pretty close" pair
2025-07-01 17:49:05.674 if eqi is None:
2025-07-01 17:49:05.674 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.674 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.674 return
2025-07-01 17:49:05.674 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.674 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.674 else:
2025-07-01 17:49:05.674 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.675 eqi = None
2025-07-01 17:49:05.675
2025-07-01 17:49:05.675 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.675 # identical
2025-07-01 17:49:05.675
2025-07-01 17:49:05.675 # pump out diffs from before the synch point
2025-07-01 17:49:05.675 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.675
2025-07-01 17:49:05.675 # do intraline marking on the synch pair
2025-07-01 17:49:05.675 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.675 if eqi is None:
2025-07-01 17:49:05.675 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.675 atags = btags = ""
2025-07-01 17:49:05.675 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.675 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.675 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.675 if tag == 'replace':
2025-07-01 17:49:05.675 atags += '^' * la
2025-07-01 17:49:05.675 btags += '^' * lb
2025-07-01 17:49:05.675 elif tag == 'delete':
2025-07-01 17:49:05.675 atags += '-' * la
2025-07-01 17:49:05.676 elif tag == 'insert':
2025-07-01 17:49:05.676 btags += '+' * lb
2025-07-01 17:49:05.676 elif tag == 'equal':
2025-07-01 17:49:05.676 atags += ' ' * la
2025-07-01 17:49:05.676 btags += ' ' * lb
2025-07-01 17:49:05.676 else:
2025-07-01 17:49:05.676 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.676 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.676 else:
2025-07-01 17:49:05.676 # the synch pair is identical
2025-07-01 17:49:05.676 yield ' ' + aelt
2025-07-01 17:49:05.676
2025-07-01 17:49:05.676 # pump out diffs from after the synch point
2025-07-01 17:49:05.676 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.676
2025-07-01 17:49:05.676 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.676 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.676
2025-07-01 17:49:05.676 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.676 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.677 alo = 44, ahi = 1101
2025-07-01 17:49:05.677 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.677 blo = 44, bhi = 1101
2025-07-01 17:49:05.677
2025-07-01 17:49:05.677 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.677 g = []
2025-07-01 17:49:05.677 if alo < ahi:
2025-07-01 17:49:05.677 if blo < bhi:
2025-07-01 17:49:05.677 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.677 else:
2025-07-01 17:49:05.677 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.677 elif blo < bhi:
2025-07-01 17:49:05.677 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.677
2025-07-01 17:49:05.677 > yield from g
2025-07-01 17:49:05.677
2025-07-01 17:49:05.677 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.677 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.677
2025-07-01 17:49:05.677 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.677 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.678 alo = 44, ahi = 1101
2025-07-01 17:49:05.678 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.678 blo = 44, bhi = 1101
2025-07-01 17:49:05.678
2025-07-01 17:49:05.678 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.678 r"""
2025-07-01 17:49:05.678 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.678 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.678 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.678 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.678
2025-07-01 17:49:05.678 Example:
2025-07-01 17:49:05.678
2025-07-01 17:49:05.678 >>> d = Differ()
2025-07-01 17:49:05.678 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.678 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.678 >>> print(''.join(results), end="")
2025-07-01 17:49:05.678 - abcDefghiJkl
2025-07-01 17:49:05.678 + abcdefGhijkl
2025-07-01 17:49:05.679 """
2025-07-01 17:49:05.682
2025-07-01 17:49:05.682 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.682 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.682 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.682 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.682 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.682
2025-07-01 17:49:05.682 # search for the pair that matches best without being identical
2025-07-01 17:49:05.682 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.682 # on junk -- unless we have to)
2025-07-01 17:49:05.682 for j in range(blo, bhi):
2025-07-01 17:49:05.682 bj = b[j]
2025-07-01 17:49:05.682 cruncher.set_seq2(bj)
2025-07-01 17:49:05.682 for i in range(alo, ahi):
2025-07-01 17:49:05.682 ai = a[i]
2025-07-01 17:49:05.682 if ai == bj:
2025-07-01 17:49:05.682 if eqi is None:
2025-07-01 17:49:05.682 eqi, eqj = i, j
2025-07-01 17:49:05.682 continue
2025-07-01 17:49:05.682 cruncher.set_seq1(ai)
2025-07-01 17:49:05.683 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.683 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.683 # compares by a factor of 3.
2025-07-01 17:49:05.683 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.683 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.683 # of the computation is cached by cruncher
2025-07-01 17:49:05.683 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.683 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.683 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.683 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.683 if best_ratio < cutoff:
2025-07-01 17:49:05.683 # no non-identical "pretty close" pair
2025-07-01 17:49:05.683 if eqi is None:
2025-07-01 17:49:05.683 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.683 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.683 return
2025-07-01 17:49:05.683 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.683 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.683 else:
2025-07-01 17:49:05.683 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.683 eqi = None
2025-07-01 17:49:05.684
2025-07-01 17:49:05.684 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.684 # identical
2025-07-01 17:49:05.684
2025-07-01 17:49:05.684 # pump out diffs from before the synch point
2025-07-01 17:49:05.684 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.684
2025-07-01 17:49:05.684 # do intraline marking on the synch pair
2025-07-01 17:49:05.684 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.684 if eqi is None:
2025-07-01 17:49:05.684 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.684 atags = btags = ""
2025-07-01 17:49:05.684 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.684 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.684 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.684 if tag == 'replace':
2025-07-01 17:49:05.684 atags += '^' * la
2025-07-01 17:49:05.684 btags += '^' * lb
2025-07-01 17:49:05.684 elif tag == 'delete':
2025-07-01 17:49:05.685 atags += '-' * la
2025-07-01 17:49:05.685 elif tag == 'insert':
2025-07-01 17:49:05.685 btags += '+' * lb
2025-07-01 17:49:05.685 elif tag == 'equal':
2025-07-01 17:49:05.685 atags += ' ' * la
2025-07-01 17:49:05.685 btags += ' ' * lb
2025-07-01 17:49:05.685 else:
2025-07-01 17:49:05.685 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.685 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.685 else:
2025-07-01 17:49:05.685 # the synch pair is identical
2025-07-01 17:49:05.685 yield ' ' + aelt
2025-07-01 17:49:05.685
2025-07-01 17:49:05.685 # pump out diffs from after the synch point
2025-07-01 17:49:05.685 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.685
2025-07-01 17:49:05.685 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.685 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.685
2025-07-01 17:49:05.685 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.685 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.686 alo = 45, ahi = 1101
2025-07-01 17:49:05.686 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.686 blo = 45, bhi = 1101
2025-07-01 17:49:05.686
2025-07-01 17:49:05.686 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.686 g = []
2025-07-01 17:49:05.686 if alo < ahi:
2025-07-01 17:49:05.686 if blo < bhi:
2025-07-01 17:49:05.686 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.686 else:
2025-07-01 17:49:05.686 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.686 elif blo < bhi:
2025-07-01 17:49:05.686 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.686
2025-07-01 17:49:05.686 > yield from g
2025-07-01 17:49:05.686
2025-07-01 17:49:05.686 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.686 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.686
2025-07-01 17:49:05.686 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.687 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.687 alo = 45, ahi = 1101
2025-07-01 17:49:05.687 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.687 blo = 45, bhi = 1101
2025-07-01 17:49:05.687
2025-07-01 17:49:05.687 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.687 r"""
2025-07-01 17:49:05.687 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.687 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.687 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.687 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.687
2025-07-01 17:49:05.687 Example:
2025-07-01 17:49:05.687
2025-07-01 17:49:05.687 >>> d = Differ()
2025-07-01 17:49:05.687 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.687 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.687 >>> print(''.join(results), end="")
2025-07-01 17:49:05.687 - abcDefghiJkl
2025-07-01 17:49:05.687 + abcdefGhijkl
2025-07-01 17:49:05.688 """
2025-07-01 17:49:05.688
2025-07-01 17:49:05.688 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.688 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.688 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.688 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.688 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.688
2025-07-01 17:49:05.688 # search for the pair that matches best without being identical
2025-07-01 17:49:05.688 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.688 # on junk -- unless we have to)
2025-07-01 17:49:05.688 for j in range(blo, bhi):
2025-07-01 17:49:05.688 bj = b[j]
2025-07-01 17:49:05.688 cruncher.set_seq2(bj)
2025-07-01 17:49:05.688 for i in range(alo, ahi):
2025-07-01 17:49:05.688 ai = a[i]
2025-07-01 17:49:05.688 if ai == bj:
2025-07-01 17:49:05.688 if eqi is None:
2025-07-01 17:49:05.688 eqi, eqj = i, j
2025-07-01 17:49:05.688 continue
2025-07-01 17:49:05.688 cruncher.set_seq1(ai)
2025-07-01 17:49:05.689 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.689 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.689 # compares by a factor of 3.
2025-07-01 17:49:05.689 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.689 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.689 # of the computation is cached by cruncher
2025-07-01 17:49:05.689 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.689 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.689 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.689 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.689 if best_ratio < cutoff:
2025-07-01 17:49:05.689 # no non-identical "pretty close" pair
2025-07-01 17:49:05.689 if eqi is None:
2025-07-01 17:49:05.689 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.689 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.689 return
2025-07-01 17:49:05.689 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.689 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.689 else:
2025-07-01 17:49:05.689 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.689 eqi = None
2025-07-01 17:49:05.689
2025-07-01 17:49:05.690 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.690 # identical
2025-07-01 17:49:05.690
2025-07-01 17:49:05.690 # pump out diffs from before the synch point
2025-07-01 17:49:05.690 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.690
2025-07-01 17:49:05.690 # do intraline marking on the synch pair
2025-07-01 17:49:05.690 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.690 if eqi is None:
2025-07-01 17:49:05.690 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.690 atags = btags = ""
2025-07-01 17:49:05.690 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.690 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.690 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.690 if tag == 'replace':
2025-07-01 17:49:05.690 atags += '^' * la
2025-07-01 17:49:05.690 btags += '^' * lb
2025-07-01 17:49:05.690 elif tag == 'delete':
2025-07-01 17:49:05.690 atags += '-' * la
2025-07-01 17:49:05.690 elif tag == 'insert':
2025-07-01 17:49:05.690 btags += '+' * lb
2025-07-01 17:49:05.690 elif tag == 'equal':
2025-07-01 17:49:05.691 atags += ' ' * la
2025-07-01 17:49:05.691 btags += ' ' * lb
2025-07-01 17:49:05.691 else:
2025-07-01 17:49:05.691 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.691 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.691 else:
2025-07-01 17:49:05.691 # the synch pair is identical
2025-07-01 17:49:05.691 yield ' ' + aelt
2025-07-01 17:49:05.691
2025-07-01 17:49:05.691 # pump out diffs from after the synch point
2025-07-01 17:49:05.691 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.691
2025-07-01 17:49:05.691 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.691 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.691
2025-07-01 17:49:05.691 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.691 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.691 alo = 48, ahi = 1101
2025-07-01 17:49:05.691 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.691 blo = 48, bhi = 1101
2025-07-01 17:49:05.691
2025-07-01 17:49:05.692 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.692 g = []
2025-07-01 17:49:05.692 if alo < ahi:
2025-07-01 17:49:05.692 if blo < bhi:
2025-07-01 17:49:05.692 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.692 else:
2025-07-01 17:49:05.692 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.692 elif blo < bhi:
2025-07-01 17:49:05.692 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.692
2025-07-01 17:49:05.692 > yield from g
2025-07-01 17:49:05.692
2025-07-01 17:49:05.692 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.692 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.692
2025-07-01 17:49:05.692 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.692 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.692 alo = 48, ahi = 1101
2025-07-01 17:49:05.692 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.692 blo = 48, bhi = 1101
2025-07-01 17:49:05.693
2025-07-01 17:49:05.693 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.693 r"""
2025-07-01 17:49:05.693 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.693 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.693 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.693 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.693
2025-07-01 17:49:05.693 Example:
2025-07-01 17:49:05.693
2025-07-01 17:49:05.693 >>> d = Differ()
2025-07-01 17:49:05.693 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.693 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.693 >>> print(''.join(results), end="")
2025-07-01 17:49:05.693 - abcDefghiJkl
2025-07-01 17:49:05.693 + abcdefGhijkl
2025-07-01 17:49:05.693 """
2025-07-01 17:49:05.693
2025-07-01 17:49:05.693 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.694 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.694 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.694 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.694 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.694
2025-07-01 17:49:05.694 # search for the pair that matches best without being identical
2025-07-01 17:49:05.694 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.694 # on junk -- unless we have to)
2025-07-01 17:49:05.694 for j in range(blo, bhi):
2025-07-01 17:49:05.694 bj = b[j]
2025-07-01 17:49:05.694 cruncher.set_seq2(bj)
2025-07-01 17:49:05.694 for i in range(alo, ahi):
2025-07-01 17:49:05.694 ai = a[i]
2025-07-01 17:49:05.694 if ai == bj:
2025-07-01 17:49:05.694 if eqi is None:
2025-07-01 17:49:05.694 eqi, eqj = i, j
2025-07-01 17:49:05.694 continue
2025-07-01 17:49:05.694 cruncher.set_seq1(ai)
2025-07-01 17:49:05.694 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.694 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.694 # compares by a factor of 3.
2025-07-01 17:49:05.695 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.701 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.701 # of the computation is cached by cruncher
2025-07-01 17:49:05.701 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.701 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.701 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.701 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.701 if best_ratio < cutoff:
2025-07-01 17:49:05.701 # no non-identical "pretty close" pair
2025-07-01 17:49:05.701 if eqi is None:
2025-07-01 17:49:05.701 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.701 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.701 return
2025-07-01 17:49:05.701 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.701 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.701 else:
2025-07-01 17:49:05.701 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.701 eqi = None
2025-07-01 17:49:05.701
2025-07-01 17:49:05.702 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.702 # identical
2025-07-01 17:49:05.702
2025-07-01 17:49:05.702 # pump out diffs from before the synch point
2025-07-01 17:49:05.702 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.702
2025-07-01 17:49:05.702 # do intraline marking on the synch pair
2025-07-01 17:49:05.702 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.702 if eqi is None:
2025-07-01 17:49:05.702 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.702 atags = btags = ""
2025-07-01 17:49:05.702 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.702 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.702 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.702 if tag == 'replace':
2025-07-01 17:49:05.702 atags += '^' * la
2025-07-01 17:49:05.702 btags += '^' * lb
2025-07-01 17:49:05.702 elif tag == 'delete':
2025-07-01 17:49:05.702 atags += '-' * la
2025-07-01 17:49:05.702 elif tag == 'insert':
2025-07-01 17:49:05.702 btags += '+' * lb
2025-07-01 17:49:05.703 elif tag == 'equal':
2025-07-01 17:49:05.703 atags += ' ' * la
2025-07-01 17:49:05.703 btags += ' ' * lb
2025-07-01 17:49:05.703 else:
2025-07-01 17:49:05.703 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.703 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.703 else:
2025-07-01 17:49:05.703 # the synch pair is identical
2025-07-01 17:49:05.703 yield ' ' + aelt
2025-07-01 17:49:05.703
2025-07-01 17:49:05.703 # pump out diffs from after the synch point
2025-07-01 17:49:05.703 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.703
2025-07-01 17:49:05.703 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.703 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.703
2025-07-01 17:49:05.703 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.703 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.703 alo = 49, ahi = 1101
2025-07-01 17:49:05.703 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.703 blo = 49, bhi = 1101
2025-07-01 17:49:05.704
2025-07-01 17:49:05.704 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.704 g = []
2025-07-01 17:49:05.704 if alo < ahi:
2025-07-01 17:49:05.704 if blo < bhi:
2025-07-01 17:49:05.704 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.704 else:
2025-07-01 17:49:05.704 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.704 elif blo < bhi:
2025-07-01 17:49:05.704 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.704
2025-07-01 17:49:05.704 > yield from g
2025-07-01 17:49:05.704
2025-07-01 17:49:05.704 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.704 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.704
2025-07-01 17:49:05.704 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.704 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.704 alo = 49, ahi = 1101
2025-07-01 17:49:05.704 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.704 blo = 49, bhi = 1101
2025-07-01 17:49:05.704
2025-07-01 17:49:05.705 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.705 r"""
2025-07-01 17:49:05.705 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.705 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.705 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.705 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.705
2025-07-01 17:49:05.705 Example:
2025-07-01 17:49:05.705
2025-07-01 17:49:05.705 >>> d = Differ()
2025-07-01 17:49:05.705 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.705 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.705 >>> print(''.join(results), end="")
2025-07-01 17:49:05.705 - abcDefghiJkl
2025-07-01 17:49:05.705 + abcdefGhijkl
2025-07-01 17:49:05.705 """
2025-07-01 17:49:05.705
2025-07-01 17:49:05.705 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.706 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.706 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.706 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.706 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.706
2025-07-01 17:49:05.706 # search for the pair that matches best without being identical
2025-07-01 17:49:05.706 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.706 # on junk -- unless we have to)
2025-07-01 17:49:05.706 for j in range(blo, bhi):
2025-07-01 17:49:05.706 bj = b[j]
2025-07-01 17:49:05.706 cruncher.set_seq2(bj)
2025-07-01 17:49:05.706 for i in range(alo, ahi):
2025-07-01 17:49:05.706 ai = a[i]
2025-07-01 17:49:05.706 if ai == bj:
2025-07-01 17:49:05.706 if eqi is None:
2025-07-01 17:49:05.706 eqi, eqj = i, j
2025-07-01 17:49:05.706 continue
2025-07-01 17:49:05.706 cruncher.set_seq1(ai)
2025-07-01 17:49:05.706 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.706 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.706 # compares by a factor of 3.
2025-07-01 17:49:05.707 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.707 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.707 # of the computation is cached by cruncher
2025-07-01 17:49:05.707 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.707 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.707 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.707 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.707 if best_ratio < cutoff:
2025-07-01 17:49:05.707 # no non-identical "pretty close" pair
2025-07-01 17:49:05.707 if eqi is None:
2025-07-01 17:49:05.707 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.707 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.707 return
2025-07-01 17:49:05.707 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.707 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.707 else:
2025-07-01 17:49:05.707 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.707 eqi = None
2025-07-01 17:49:05.708
2025-07-01 17:49:05.708 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.708 # identical
2025-07-01 17:49:05.708
2025-07-01 17:49:05.708 # pump out diffs from before the synch point
2025-07-01 17:49:05.708 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.708
2025-07-01 17:49:05.708 # do intraline marking on the synch pair
2025-07-01 17:49:05.708 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.708 if eqi is None:
2025-07-01 17:49:05.708 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.708 atags = btags = ""
2025-07-01 17:49:05.708 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.708 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.708 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.708 if tag == 'replace':
2025-07-01 17:49:05.708 atags += '^' * la
2025-07-01 17:49:05.708 btags += '^' * lb
2025-07-01 17:49:05.708 elif tag == 'delete':
2025-07-01 17:49:05.708 atags += '-' * la
2025-07-01 17:49:05.708 elif tag == 'insert':
2025-07-01 17:49:05.709 btags += '+' * lb
2025-07-01 17:49:05.709 elif tag == 'equal':
2025-07-01 17:49:05.709 atags += ' ' * la
2025-07-01 17:49:05.709 btags += ' ' * lb
2025-07-01 17:49:05.709 else:
2025-07-01 17:49:05.709 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.709 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.709 else:
2025-07-01 17:49:05.709 # the synch pair is identical
2025-07-01 17:49:05.709 yield ' ' + aelt
2025-07-01 17:49:05.709
2025-07-01 17:49:05.709 # pump out diffs from after the synch point
2025-07-01 17:49:05.709 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.709
2025-07-01 17:49:05.709 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.709 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.709
2025-07-01 17:49:05.709 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.709 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.709 alo = 50, ahi = 1101
2025-07-01 17:49:05.710 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.710 blo = 50, bhi = 1101
2025-07-01 17:49:05.710
2025-07-01 17:49:05.710 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.710 g = []
2025-07-01 17:49:05.710 if alo < ahi:
2025-07-01 17:49:05.710 if blo < bhi:
2025-07-01 17:49:05.710 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.710 else:
2025-07-01 17:49:05.710 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.710 elif blo < bhi:
2025-07-01 17:49:05.710 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.710
2025-07-01 17:49:05.710 > yield from g
2025-07-01 17:49:05.710
2025-07-01 17:49:05.710 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.710 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.710
2025-07-01 17:49:05.710 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.710 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.710 alo = 50, ahi = 1101
2025-07-01 17:49:05.714 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.714 blo = 50, bhi = 1101
2025-07-01 17:49:05.714
2025-07-01 17:49:05.714 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.714 r"""
2025-07-01 17:49:05.714 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.714 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.714 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.714 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.714
2025-07-01 17:49:05.714 Example:
2025-07-01 17:49:05.714
2025-07-01 17:49:05.714 >>> d = Differ()
2025-07-01 17:49:05.714 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.714 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.714 >>> print(''.join(results), end="")
2025-07-01 17:49:05.714 - abcDefghiJkl
2025-07-01 17:49:05.714 + abcdefGhijkl
2025-07-01 17:49:05.715 """
2025-07-01 17:49:05.715
2025-07-01 17:49:05.715 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.715 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.715 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.715 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.715 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.715
2025-07-01 17:49:05.715 # search for the pair that matches best without being identical
2025-07-01 17:49:05.715 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.715 # on junk -- unless we have to)
2025-07-01 17:49:05.715 for j in range(blo, bhi):
2025-07-01 17:49:05.715 bj = b[j]
2025-07-01 17:49:05.715 cruncher.set_seq2(bj)
2025-07-01 17:49:05.715 for i in range(alo, ahi):
2025-07-01 17:49:05.715 ai = a[i]
2025-07-01 17:49:05.715 if ai == bj:
2025-07-01 17:49:05.715 if eqi is None:
2025-07-01 17:49:05.715 eqi, eqj = i, j
2025-07-01 17:49:05.715 continue
2025-07-01 17:49:05.715 cruncher.set_seq1(ai)
2025-07-01 17:49:05.716 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.716 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.716 # compares by a factor of 3.
2025-07-01 17:49:05.716 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.716 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.716 # of the computation is cached by cruncher
2025-07-01 17:49:05.716 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.716 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.716 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.716 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.716 if best_ratio < cutoff:
2025-07-01 17:49:05.716 # no non-identical "pretty close" pair
2025-07-01 17:49:05.716 if eqi is None:
2025-07-01 17:49:05.716 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.716 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.716 return
2025-07-01 17:49:05.716 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.716 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.716 else:
2025-07-01 17:49:05.716 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.716 eqi = None
2025-07-01 17:49:05.716
2025-07-01 17:49:05.717 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.717 # identical
2025-07-01 17:49:05.717
2025-07-01 17:49:05.717 # pump out diffs from before the synch point
2025-07-01 17:49:05.717 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.717
2025-07-01 17:49:05.717 # do intraline marking on the synch pair
2025-07-01 17:49:05.717 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.717 if eqi is None:
2025-07-01 17:49:05.717 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.717 atags = btags = ""
2025-07-01 17:49:05.717 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.717 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.717 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.717 if tag == 'replace':
2025-07-01 17:49:05.717 atags += '^' * la
2025-07-01 17:49:05.717 btags += '^' * lb
2025-07-01 17:49:05.717 elif tag == 'delete':
2025-07-01 17:49:05.717 atags += '-' * la
2025-07-01 17:49:05.717 elif tag == 'insert':
2025-07-01 17:49:05.717 btags += '+' * lb
2025-07-01 17:49:05.717 elif tag == 'equal':
2025-07-01 17:49:05.717 atags += ' ' * la
2025-07-01 17:49:05.717 btags += ' ' * lb
2025-07-01 17:49:05.717 else:
2025-07-01 17:49:05.717 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.717 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.718 else:
2025-07-01 17:49:05.718 # the synch pair is identical
2025-07-01 17:49:05.718 yield ' ' + aelt
2025-07-01 17:49:05.718
2025-07-01 17:49:05.718 # pump out diffs from after the synch point
2025-07-01 17:49:05.718 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.718
2025-07-01 17:49:05.718 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.718 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.718
2025-07-01 17:49:05.718 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.718 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.718 alo = 51, ahi = 1101
2025-07-01 17:49:05.718 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.718 blo = 51, bhi = 1101
2025-07-01 17:49:05.718
2025-07-01 17:49:05.718 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.718 g = []
2025-07-01 17:49:05.718 if alo < ahi:
2025-07-01 17:49:05.719 if blo < bhi:
2025-07-01 17:49:05.719 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.719 else:
2025-07-01 17:49:05.719 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.719 elif blo < bhi:
2025-07-01 17:49:05.719 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.719
2025-07-01 17:49:05.719 > yield from g
2025-07-01 17:49:05.719
2025-07-01 17:49:05.719 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.719 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.719
2025-07-01 17:49:05.719 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.719 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.719 alo = 51, ahi = 1101
2025-07-01 17:49:05.719 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.719 blo = 51, bhi = 1101
2025-07-01 17:49:05.719
2025-07-01 17:49:05.719 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.719 r"""
2025-07-01 17:49:05.720 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.720 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.720 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.720 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.720
2025-07-01 17:49:05.720 Example:
2025-07-01 17:49:05.720
2025-07-01 17:49:05.720 >>> d = Differ()
2025-07-01 17:49:05.720 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.720 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.720 >>> print(''.join(results), end="")
2025-07-01 17:49:05.720 - abcDefghiJkl
2025-07-01 17:49:05.720 + abcdefGhijkl
2025-07-01 17:49:05.720 """
2025-07-01 17:49:05.720
2025-07-01 17:49:05.720 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.720 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.720 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.720 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.721 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.721
2025-07-01 17:49:05.721 # search for the pair that matches best without being identical
2025-07-01 17:49:05.721 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.721 # on junk -- unless we have to)
2025-07-01 17:49:05.721 for j in range(blo, bhi):
2025-07-01 17:49:05.721 bj = b[j]
2025-07-01 17:49:05.721 cruncher.set_seq2(bj)
2025-07-01 17:49:05.721 for i in range(alo, ahi):
2025-07-01 17:49:05.721 ai = a[i]
2025-07-01 17:49:05.721 if ai == bj:
2025-07-01 17:49:05.721 if eqi is None:
2025-07-01 17:49:05.721 eqi, eqj = i, j
2025-07-01 17:49:05.721 continue
2025-07-01 17:49:05.721 cruncher.set_seq1(ai)
2025-07-01 17:49:05.721 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.721 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.721 # compares by a factor of 3.
2025-07-01 17:49:05.721 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.721 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.722 # of the computation is cached by cruncher
2025-07-01 17:49:05.722 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.722 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.722 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.722 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.722 if best_ratio < cutoff:
2025-07-01 17:49:05.722 # no non-identical "pretty close" pair
2025-07-01 17:49:05.722 if eqi is None:
2025-07-01 17:49:05.722 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.722 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.722 return
2025-07-01 17:49:05.722 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.722 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.722 else:
2025-07-01 17:49:05.722 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.722 eqi = None
2025-07-01 17:49:05.722
2025-07-01 17:49:05.722 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.722 # identical
2025-07-01 17:49:05.722
2025-07-01 17:49:05.722 # pump out diffs from before the synch point
2025-07-01 17:49:05.723 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.723
2025-07-01 17:49:05.723 # do intraline marking on the synch pair
2025-07-01 17:49:05.723 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.723 if eqi is None:
2025-07-01 17:49:05.723 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.723 atags = btags = ""
2025-07-01 17:49:05.723 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.723 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.723 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.723 if tag == 'replace':
2025-07-01 17:49:05.723 atags += '^' * la
2025-07-01 17:49:05.723 btags += '^' * lb
2025-07-01 17:49:05.723 elif tag == 'delete':
2025-07-01 17:49:05.723 atags += '-' * la
2025-07-01 17:49:05.723 elif tag == 'insert':
2025-07-01 17:49:05.723 btags += '+' * lb
2025-07-01 17:49:05.723 elif tag == 'equal':
2025-07-01 17:49:05.723 atags += ' ' * la
2025-07-01 17:49:05.723 btags += ' ' * lb
2025-07-01 17:49:05.723 else:
2025-07-01 17:49:05.724 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.724 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.724 else:
2025-07-01 17:49:05.724 # the synch pair is identical
2025-07-01 17:49:05.724 yield ' ' + aelt
2025-07-01 17:49:05.724
2025-07-01 17:49:05.724 # pump out diffs from after the synch point
2025-07-01 17:49:05.724 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.724
2025-07-01 17:49:05.724 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.724 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.724
2025-07-01 17:49:05.724 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.724 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.724 alo = 52, ahi = 1101
2025-07-01 17:49:05.724 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.724 blo = 52, bhi = 1101
2025-07-01 17:49:05.724
2025-07-01 17:49:05.724 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.724 g = []
2025-07-01 17:49:05.724 if alo < ahi:
2025-07-01 17:49:05.725 if blo < bhi:
2025-07-01 17:49:05.725 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.725 else:
2025-07-01 17:49:05.725 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.725 elif blo < bhi:
2025-07-01 17:49:05.725 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.725
2025-07-01 17:49:05.725 > yield from g
2025-07-01 17:49:05.725
2025-07-01 17:49:05.725 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.725 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.725
2025-07-01 17:49:05.725 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.725 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.725 alo = 52, ahi = 1101
2025-07-01 17:49:05.725 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.725 blo = 52, bhi = 1101
2025-07-01 17:49:05.725
2025-07-01 17:49:05.725 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.725 r"""
2025-07-01 17:49:05.726 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.730 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.730 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.730 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.731
2025-07-01 17:49:05.731 Example:
2025-07-01 17:49:05.731
2025-07-01 17:49:05.731 >>> d = Differ()
2025-07-01 17:49:05.731 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.731 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.731 >>> print(''.join(results), end="")
2025-07-01 17:49:05.731 - abcDefghiJkl
2025-07-01 17:49:05.731 + abcdefGhijkl
2025-07-01 17:49:05.731 """
2025-07-01 17:49:05.731
2025-07-01 17:49:05.731 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.731 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.731 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.731 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.731 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.731
2025-07-01 17:49:05.732 # search for the pair that matches best without being identical
2025-07-01 17:49:05.732 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.732 # on junk -- unless we have to)
2025-07-01 17:49:05.732 for j in range(blo, bhi):
2025-07-01 17:49:05.732 bj = b[j]
2025-07-01 17:49:05.732 cruncher.set_seq2(bj)
2025-07-01 17:49:05.732 for i in range(alo, ahi):
2025-07-01 17:49:05.732 ai = a[i]
2025-07-01 17:49:05.732 if ai == bj:
2025-07-01 17:49:05.732 if eqi is None:
2025-07-01 17:49:05.732 eqi, eqj = i, j
2025-07-01 17:49:05.732 continue
2025-07-01 17:49:05.732 cruncher.set_seq1(ai)
2025-07-01 17:49:05.732 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.732 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.732 # compares by a factor of 3.
2025-07-01 17:49:05.732 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.732 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.732 # of the computation is cached by cruncher
2025-07-01 17:49:05.732 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.732 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.732 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.732 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.733 if best_ratio < cutoff:
2025-07-01 17:49:05.733 # no non-identical "pretty close" pair
2025-07-01 17:49:05.733 if eqi is None:
2025-07-01 17:49:05.733 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.733 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.733 return
2025-07-01 17:49:05.733 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.733 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.733 else:
2025-07-01 17:49:05.733 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.733 eqi = None
2025-07-01 17:49:05.733
2025-07-01 17:49:05.733 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.733 # identical
2025-07-01 17:49:05.733
2025-07-01 17:49:05.733 # pump out diffs from before the synch point
2025-07-01 17:49:05.733 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.733
2025-07-01 17:49:05.733 # do intraline marking on the synch pair
2025-07-01 17:49:05.733 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.733 if eqi is None:
2025-07-01 17:49:05.733 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.734 atags = btags = ""
2025-07-01 17:49:05.734 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.734 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.734 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.734 if tag == 'replace':
2025-07-01 17:49:05.734 atags += '^' * la
2025-07-01 17:49:05.734 btags += '^' * lb
2025-07-01 17:49:05.734 elif tag == 'delete':
2025-07-01 17:49:05.734 atags += '-' * la
2025-07-01 17:49:05.734 elif tag == 'insert':
2025-07-01 17:49:05.734 btags += '+' * lb
2025-07-01 17:49:05.734 elif tag == 'equal':
2025-07-01 17:49:05.734 atags += ' ' * la
2025-07-01 17:49:05.734 btags += ' ' * lb
2025-07-01 17:49:05.734 else:
2025-07-01 17:49:05.734 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.734 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.734 else:
2025-07-01 17:49:05.734 # the synch pair is identical
2025-07-01 17:49:05.734 yield ' ' + aelt
2025-07-01 17:49:05.734
2025-07-01 17:49:05.735 # pump out diffs from after the synch point
2025-07-01 17:49:05.735 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.735
2025-07-01 17:49:05.735 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.735 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.735
2025-07-01 17:49:05.735 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.735 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.735 alo = 53, ahi = 1101
2025-07-01 17:49:05.735 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.735 blo = 53, bhi = 1101
2025-07-01 17:49:05.735
2025-07-01 17:49:05.735 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.735 g = []
2025-07-01 17:49:05.735 if alo < ahi:
2025-07-01 17:49:05.735 if blo < bhi:
2025-07-01 17:49:05.735 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.735 else:
2025-07-01 17:49:05.735 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.735 elif blo < bhi:
2025-07-01 17:49:05.735 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.736
2025-07-01 17:49:05.736 > yield from g
2025-07-01 17:49:05.736
2025-07-01 17:49:05.736 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.736 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.736
2025-07-01 17:49:05.736 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.736 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.736 alo = 53, ahi = 1101
2025-07-01 17:49:05.736 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.736 blo = 53, bhi = 1101
2025-07-01 17:49:05.736
2025-07-01 17:49:05.736 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.736 r"""
2025-07-01 17:49:05.736 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.736 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.736 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.736 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.736
2025-07-01 17:49:05.736 Example:
2025-07-01 17:49:05.736
2025-07-01 17:49:05.736 >>> d = Differ()
2025-07-01 17:49:05.737 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.737 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.737 >>> print(''.join(results), end="")
2025-07-01 17:49:05.737 - abcDefghiJkl
2025-07-01 17:49:05.737 + abcdefGhijkl
2025-07-01 17:49:05.737 """
2025-07-01 17:49:05.737
2025-07-01 17:49:05.737 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.737 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.737 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.737 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.737 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.737
2025-07-01 17:49:05.737 # search for the pair that matches best without being identical
2025-07-01 17:49:05.737 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.737 # on junk -- unless we have to)
2025-07-01 17:49:05.737 for j in range(blo, bhi):
2025-07-01 17:49:05.737 bj = b[j]
2025-07-01 17:49:05.737 cruncher.set_seq2(bj)
2025-07-01 17:49:05.738 for i in range(alo, ahi):
2025-07-01 17:49:05.738 ai = a[i]
2025-07-01 17:49:05.738 if ai == bj:
2025-07-01 17:49:05.738 if eqi is None:
2025-07-01 17:49:05.738 eqi, eqj = i, j
2025-07-01 17:49:05.738 continue
2025-07-01 17:49:05.738 cruncher.set_seq1(ai)
2025-07-01 17:49:05.738 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.738 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.738 # compares by a factor of 3.
2025-07-01 17:49:05.738 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.738 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.738 # of the computation is cached by cruncher
2025-07-01 17:49:05.738 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.738 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.738 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.738 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.738 if best_ratio < cutoff:
2025-07-01 17:49:05.738 # no non-identical "pretty close" pair
2025-07-01 17:49:05.738 if eqi is None:
2025-07-01 17:49:05.738 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.738 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.739 return
2025-07-01 17:49:05.739 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.739 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.739 else:
2025-07-01 17:49:05.739 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.739 eqi = None
2025-07-01 17:49:05.739
2025-07-01 17:49:05.739 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.739 # identical
2025-07-01 17:49:05.739
2025-07-01 17:49:05.739 # pump out diffs from before the synch point
2025-07-01 17:49:05.739 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.739
2025-07-01 17:49:05.739 # do intraline marking on the synch pair
2025-07-01 17:49:05.739 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.739 if eqi is None:
2025-07-01 17:49:05.739 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.739 atags = btags = ""
2025-07-01 17:49:05.739 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.740 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.740 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.740 if tag == 'replace':
2025-07-01 17:49:05.740 atags += '^' * la
2025-07-01 17:49:05.740 btags += '^' * lb
2025-07-01 17:49:05.740 elif tag == 'delete':
2025-07-01 17:49:05.740 atags += '-' * la
2025-07-01 17:49:05.740 elif tag == 'insert':
2025-07-01 17:49:05.740 btags += '+' * lb
2025-07-01 17:49:05.740 elif tag == 'equal':
2025-07-01 17:49:05.740 atags += ' ' * la
2025-07-01 17:49:05.740 btags += ' ' * lb
2025-07-01 17:49:05.740 else:
2025-07-01 17:49:05.740 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.740 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.740 else:
2025-07-01 17:49:05.740 # the synch pair is identical
2025-07-01 17:49:05.740 yield ' ' + aelt
2025-07-01 17:49:05.740
2025-07-01 17:49:05.740 # pump out diffs from after the synch point
2025-07-01 17:49:05.741 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.743
2025-07-01 17:49:05.744 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.744 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.744
2025-07-01 17:49:05.744 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.744 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.744 alo = 54, ahi = 1101
2025-07-01 17:49:05.744 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.744 blo = 54, bhi = 1101
2025-07-01 17:49:05.744
2025-07-01 17:49:05.744 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.744 g = []
2025-07-01 17:49:05.744 if alo < ahi:
2025-07-01 17:49:05.744 if blo < bhi:
2025-07-01 17:49:05.744 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.744 else:
2025-07-01 17:49:05.744 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.744 elif blo < bhi:
2025-07-01 17:49:05.744 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.744
2025-07-01 17:49:05.744 > yield from g
2025-07-01 17:49:05.745
2025-07-01 17:49:05.745 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.745 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.745
2025-07-01 17:49:05.745 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.745 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.745 alo = 54, ahi = 1101
2025-07-01 17:49:05.745 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.745 blo = 54, bhi = 1101
2025-07-01 17:49:05.745
2025-07-01 17:49:05.745 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.745 r"""
2025-07-01 17:49:05.745 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.745 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.745 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.745 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.745
2025-07-01 17:49:05.745 Example:
2025-07-01 17:49:05.745
2025-07-01 17:49:05.745 >>> d = Differ()
2025-07-01 17:49:05.745 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.746 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.746 >>> print(''.join(results), end="")
2025-07-01 17:49:05.746 - abcDefghiJkl
2025-07-01 17:49:05.746 + abcdefGhijkl
2025-07-01 17:49:05.746 """
2025-07-01 17:49:05.746
2025-07-01 17:49:05.746 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.746 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.746 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.746 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.746 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.746
2025-07-01 17:49:05.746 # search for the pair that matches best without being identical
2025-07-01 17:49:05.746 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.746 # on junk -- unless we have to)
2025-07-01 17:49:05.746 for j in range(blo, bhi):
2025-07-01 17:49:05.746 bj = b[j]
2025-07-01 17:49:05.746 cruncher.set_seq2(bj)
2025-07-01 17:49:05.746 for i in range(alo, ahi):
2025-07-01 17:49:05.747 ai = a[i]
2025-07-01 17:49:05.747 if ai == bj:
2025-07-01 17:49:05.747 if eqi is None:
2025-07-01 17:49:05.747 eqi, eqj = i, j
2025-07-01 17:49:05.747 continue
2025-07-01 17:49:05.747 cruncher.set_seq1(ai)
2025-07-01 17:49:05.747 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.748 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.748 # compares by a factor of 3.
2025-07-01 17:49:05.748 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.748 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.748 # of the computation is cached by cruncher
2025-07-01 17:49:05.748 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.748 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.748 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.748 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.748 if best_ratio < cutoff:
2025-07-01 17:49:05.748 # no non-identical "pretty close" pair
2025-07-01 17:49:05.748 if eqi is None:
2025-07-01 17:49:05.748 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.748 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.748 return
2025-07-01 17:49:05.748 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.748 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.748 else:
2025-07-01 17:49:05.748 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.748 eqi = None
2025-07-01 17:49:05.749
2025-07-01 17:49:05.749 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.749 # identical
2025-07-01 17:49:05.749
2025-07-01 17:49:05.749 # pump out diffs from before the synch point
2025-07-01 17:49:05.749 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.749
2025-07-01 17:49:05.749 # do intraline marking on the synch pair
2025-07-01 17:49:05.749 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.749 if eqi is None:
2025-07-01 17:49:05.749 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.749 atags = btags = ""
2025-07-01 17:49:05.749 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.749 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.749 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.749 if tag == 'replace':
2025-07-01 17:49:05.749 atags += '^' * la
2025-07-01 17:49:05.749 btags += '^' * lb
2025-07-01 17:49:05.749 elif tag == 'delete':
2025-07-01 17:49:05.749 atags += '-' * la
2025-07-01 17:49:05.749 elif tag == 'insert':
2025-07-01 17:49:05.750 btags += '+' * lb
2025-07-01 17:49:05.750 elif tag == 'equal':
2025-07-01 17:49:05.750 atags += ' ' * la
2025-07-01 17:49:05.750 btags += ' ' * lb
2025-07-01 17:49:05.750 else:
2025-07-01 17:49:05.750 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.750 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.750 else:
2025-07-01 17:49:05.750 # the synch pair is identical
2025-07-01 17:49:05.750 yield ' ' + aelt
2025-07-01 17:49:05.750
2025-07-01 17:49:05.750 # pump out diffs from after the synch point
2025-07-01 17:49:05.750 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.750
2025-07-01 17:49:05.750 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.750 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.750
2025-07-01 17:49:05.750 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.750 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.750 alo = 55, ahi = 1101
2025-07-01 17:49:05.750 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.750 blo = 55, bhi = 1101
2025-07-01 17:49:05.751
2025-07-01 17:49:05.751 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.751 g = []
2025-07-01 17:49:05.751 if alo < ahi:
2025-07-01 17:49:05.751 if blo < bhi:
2025-07-01 17:49:05.751 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.751 else:
2025-07-01 17:49:05.751 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.751 elif blo < bhi:
2025-07-01 17:49:05.751 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.751
2025-07-01 17:49:05.751 > yield from g
2025-07-01 17:49:05.751
2025-07-01 17:49:05.751 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.751 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.751
2025-07-01 17:49:05.751 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.751 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.752 alo = 55, ahi = 1101
2025-07-01 17:49:05.752 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.752 blo = 55, bhi = 1101
2025-07-01 17:49:05.752
2025-07-01 17:49:05.752 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.752 r"""
2025-07-01 17:49:05.752 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.752 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.752 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.752 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.752
2025-07-01 17:49:05.752 Example:
2025-07-01 17:49:05.752
2025-07-01 17:49:05.752 >>> d = Differ()
2025-07-01 17:49:05.752 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.752 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.752 >>> print(''.join(results), end="")
2025-07-01 17:49:05.752 - abcDefghiJkl
2025-07-01 17:49:05.752 + abcdefGhijkl
2025-07-01 17:49:05.753 """
2025-07-01 17:49:05.753
2025-07-01 17:49:05.753 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.753 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.753 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.753 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.753 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.753
2025-07-01 17:49:05.753 # search for the pair that matches best without being identical
2025-07-01 17:49:05.753 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.753 # on junk -- unless we have to)
2025-07-01 17:49:05.753 for j in range(blo, bhi):
2025-07-01 17:49:05.753 bj = b[j]
2025-07-01 17:49:05.753 cruncher.set_seq2(bj)
2025-07-01 17:49:05.753 for i in range(alo, ahi):
2025-07-01 17:49:05.753 ai = a[i]
2025-07-01 17:49:05.753 if ai == bj:
2025-07-01 17:49:05.753 if eqi is None:
2025-07-01 17:49:05.753 eqi, eqj = i, j
2025-07-01 17:49:05.753 continue
2025-07-01 17:49:05.754 cruncher.set_seq1(ai)
2025-07-01 17:49:05.754 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.754 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.754 # compares by a factor of 3.
2025-07-01 17:49:05.754 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.754 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.754 # of the computation is cached by cruncher
2025-07-01 17:49:05.754 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.754 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.754 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.754 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.754 if best_ratio < cutoff:
2025-07-01 17:49:05.754 # no non-identical "pretty close" pair
2025-07-01 17:49:05.754 if eqi is None:
2025-07-01 17:49:05.754 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.754 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.754 return
2025-07-01 17:49:05.754 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.754 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.754 else:
2025-07-01 17:49:05.754 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.755 eqi = None
2025-07-01 17:49:05.755
2025-07-01 17:49:05.755 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.755 # identical
2025-07-01 17:49:05.755
2025-07-01 17:49:05.755 # pump out diffs from before the synch point
2025-07-01 17:49:05.755 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.755
2025-07-01 17:49:05.755 # do intraline marking on the synch pair
2025-07-01 17:49:05.755 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.755 if eqi is None:
2025-07-01 17:49:05.755 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.755 atags = btags = ""
2025-07-01 17:49:05.755 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.755 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.755 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.755 if tag == 'replace':
2025-07-01 17:49:05.755 atags += '^' * la
2025-07-01 17:49:05.755 btags += '^' * lb
2025-07-01 17:49:05.755 elif tag == 'delete':
2025-07-01 17:49:05.755 atags += '-' * la
2025-07-01 17:49:05.756 elif tag == 'insert':
2025-07-01 17:49:05.756 btags += '+' * lb
2025-07-01 17:49:05.756 elif tag == 'equal':
2025-07-01 17:49:05.756 atags += ' ' * la
2025-07-01 17:49:05.756 btags += ' ' * lb
2025-07-01 17:49:05.756 else:
2025-07-01 17:49:05.756 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.756 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.756 else:
2025-07-01 17:49:05.756 # the synch pair is identical
2025-07-01 17:49:05.756 yield ' ' + aelt
2025-07-01 17:49:05.756
2025-07-01 17:49:05.756 # pump out diffs from after the synch point
2025-07-01 17:49:05.756 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.756
2025-07-01 17:49:05.756 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.756 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.756
2025-07-01 17:49:05.756 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.756 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.756 alo = 56, ahi = 1101
2025-07-01 17:49:05.757 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.762 blo = 56, bhi = 1101
2025-07-01 17:49:05.762
2025-07-01 17:49:05.762 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.762 g = []
2025-07-01 17:49:05.762 if alo < ahi:
2025-07-01 17:49:05.762 if blo < bhi:
2025-07-01 17:49:05.762 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.762 else:
2025-07-01 17:49:05.762 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.762 elif blo < bhi:
2025-07-01 17:49:05.762 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.762
2025-07-01 17:49:05.762 > yield from g
2025-07-01 17:49:05.762
2025-07-01 17:49:05.762 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.762 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.763
2025-07-01 17:49:05.763 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.763 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.763 alo = 56, ahi = 1101
2025-07-01 17:49:05.763 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.763 blo = 56, bhi = 1101
2025-07-01 17:49:05.763
2025-07-01 17:49:05.763 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.763 r"""
2025-07-01 17:49:05.763 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.763 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.763 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.763 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.763
2025-07-01 17:49:05.763 Example:
2025-07-01 17:49:05.763
2025-07-01 17:49:05.763 >>> d = Differ()
2025-07-01 17:49:05.763 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.763 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.763 >>> print(''.join(results), end="")
2025-07-01 17:49:05.763 - abcDefghiJkl
2025-07-01 17:49:05.764 + abcdefGhijkl
2025-07-01 17:49:05.764 """
2025-07-01 17:49:05.764
2025-07-01 17:49:05.764 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.764 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.764 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.764 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.764 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.764
2025-07-01 17:49:05.764 # search for the pair that matches best without being identical
2025-07-01 17:49:05.764 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.764 # on junk -- unless we have to)
2025-07-01 17:49:05.764 for j in range(blo, bhi):
2025-07-01 17:49:05.764 bj = b[j]
2025-07-01 17:49:05.764 cruncher.set_seq2(bj)
2025-07-01 17:49:05.764 for i in range(alo, ahi):
2025-07-01 17:49:05.764 ai = a[i]
2025-07-01 17:49:05.764 if ai == bj:
2025-07-01 17:49:05.764 if eqi is None:
2025-07-01 17:49:05.765 eqi, eqj = i, j
2025-07-01 17:49:05.765 continue
2025-07-01 17:49:05.765 cruncher.set_seq1(ai)
2025-07-01 17:49:05.765 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.765 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.765 # compares by a factor of 3.
2025-07-01 17:49:05.765 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.765 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.765 # of the computation is cached by cruncher
2025-07-01 17:49:05.765 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.765 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.765 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.765 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.765 if best_ratio < cutoff:
2025-07-01 17:49:05.765 # no non-identical "pretty close" pair
2025-07-01 17:49:05.765 if eqi is None:
2025-07-01 17:49:05.765 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.765 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.766 return
2025-07-01 17:49:05.766 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.766 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.766 else:
2025-07-01 17:49:05.766 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.766 eqi = None
2025-07-01 17:49:05.766
2025-07-01 17:49:05.766 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.766 # identical
2025-07-01 17:49:05.766
2025-07-01 17:49:05.766 # pump out diffs from before the synch point
2025-07-01 17:49:05.766 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.766
2025-07-01 17:49:05.766 # do intraline marking on the synch pair
2025-07-01 17:49:05.766 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.766 if eqi is None:
2025-07-01 17:49:05.766 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.766 atags = btags = ""
2025-07-01 17:49:05.766 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.766 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.766 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.767 if tag == 'replace':
2025-07-01 17:49:05.767 atags += '^' * la
2025-07-01 17:49:05.767 btags += '^' * lb
2025-07-01 17:49:05.767 elif tag == 'delete':
2025-07-01 17:49:05.767 atags += '-' * la
2025-07-01 17:49:05.767 elif tag == 'insert':
2025-07-01 17:49:05.767 btags += '+' * lb
2025-07-01 17:49:05.767 elif tag == 'equal':
2025-07-01 17:49:05.767 atags += ' ' * la
2025-07-01 17:49:05.767 btags += ' ' * lb
2025-07-01 17:49:05.767 else:
2025-07-01 17:49:05.767 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.767 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.767 else:
2025-07-01 17:49:05.767 # the synch pair is identical
2025-07-01 17:49:05.767 yield ' ' + aelt
2025-07-01 17:49:05.767
2025-07-01 17:49:05.767 # pump out diffs from after the synch point
2025-07-01 17:49:05.767 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.768
2025-07-01 17:49:05.768 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.768 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.768
2025-07-01 17:49:05.768 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.768 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.768 alo = 57, ahi = 1101
2025-07-01 17:49:05.768 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.768 blo = 57, bhi = 1101
2025-07-01 17:49:05.768
2025-07-01 17:49:05.768 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.768 g = []
2025-07-01 17:49:05.768 if alo < ahi:
2025-07-01 17:49:05.768 if blo < bhi:
2025-07-01 17:49:05.768 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.768 else:
2025-07-01 17:49:05.768 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.768 elif blo < bhi:
2025-07-01 17:49:05.768 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.768
2025-07-01 17:49:05.768 > yield from g
2025-07-01 17:49:05.769
2025-07-01 17:49:05.769 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.769 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.769
2025-07-01 17:49:05.769 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.769 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.769 alo = 57, ahi = 1101
2025-07-01 17:49:05.769 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.769 blo = 57, bhi = 1101
2025-07-01 17:49:05.769
2025-07-01 17:49:05.769 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.769 r"""
2025-07-01 17:49:05.769 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.769 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.769 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.769 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.769
2025-07-01 17:49:05.769 Example:
2025-07-01 17:49:05.769
2025-07-01 17:49:05.769 >>> d = Differ()
2025-07-01 17:49:05.769 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.770 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.770 >>> print(''.join(results), end="")
2025-07-01 17:49:05.770 - abcDefghiJkl
2025-07-01 17:49:05.770 + abcdefGhijkl
2025-07-01 17:49:05.770 """
2025-07-01 17:49:05.770
2025-07-01 17:49:05.770 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.770 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.770 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.770 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.770 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.770
2025-07-01 17:49:05.770 # search for the pair that matches best without being identical
2025-07-01 17:49:05.770 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.770 # on junk -- unless we have to)
2025-07-01 17:49:05.770 for j in range(blo, bhi):
2025-07-01 17:49:05.770 bj = b[j]
2025-07-01 17:49:05.770 cruncher.set_seq2(bj)
2025-07-01 17:49:05.771 for i in range(alo, ahi):
2025-07-01 17:49:05.771 ai = a[i]
2025-07-01 17:49:05.771 if ai == bj:
2025-07-01 17:49:05.771 if eqi is None:
2025-07-01 17:49:05.771 eqi, eqj = i, j
2025-07-01 17:49:05.771 continue
2025-07-01 17:49:05.771 cruncher.set_seq1(ai)
2025-07-01 17:49:05.771 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.771 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.771 # compares by a factor of 3.
2025-07-01 17:49:05.771 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.771 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.771 # of the computation is cached by cruncher
2025-07-01 17:49:05.771 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.771 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.771 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.771 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.771 if best_ratio < cutoff:
2025-07-01 17:49:05.771 # no non-identical "pretty close" pair
2025-07-01 17:49:05.771 if eqi is None:
2025-07-01 17:49:05.771 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.772 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.772 return
2025-07-01 17:49:05.772 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.772 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.772 else:
2025-07-01 17:49:05.772 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.772 eqi = None
2025-07-01 17:49:05.772
2025-07-01 17:49:05.772 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.772 # identical
2025-07-01 17:49:05.772
2025-07-01 17:49:05.772 # pump out diffs from before the synch point
2025-07-01 17:49:05.772 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.772
2025-07-01 17:49:05.772 # do intraline marking on the synch pair
2025-07-01 17:49:05.772 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.772 if eqi is None:
2025-07-01 17:49:05.772 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.772 atags = btags = ""
2025-07-01 17:49:05.772 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.773 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.775 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.776 if tag == 'replace':
2025-07-01 17:49:05.776 atags += '^' * la
2025-07-01 17:49:05.776 btags += '^' * lb
2025-07-01 17:49:05.776 elif tag == 'delete':
2025-07-01 17:49:05.776 atags += '-' * la
2025-07-01 17:49:05.776 elif tag == 'insert':
2025-07-01 17:49:05.776 btags += '+' * lb
2025-07-01 17:49:05.776 elif tag == 'equal':
2025-07-01 17:49:05.776 atags += ' ' * la
2025-07-01 17:49:05.776 btags += ' ' * lb
2025-07-01 17:49:05.776 else:
2025-07-01 17:49:05.776 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.776 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.776 else:
2025-07-01 17:49:05.776 # the synch pair is identical
2025-07-01 17:49:05.776 yield ' ' + aelt
2025-07-01 17:49:05.776
2025-07-01 17:49:05.776 # pump out diffs from after the synch point
2025-07-01 17:49:05.777 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.777
2025-07-01 17:49:05.777 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.777 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.777
2025-07-01 17:49:05.777 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.777 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.777 alo = 58, ahi = 1101
2025-07-01 17:49:05.777 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.777 blo = 58, bhi = 1101
2025-07-01 17:49:05.777
2025-07-01 17:49:05.777 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.777 g = []
2025-07-01 17:49:05.777 if alo < ahi:
2025-07-01 17:49:05.777 if blo < bhi:
2025-07-01 17:49:05.777 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.777 else:
2025-07-01 17:49:05.777 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.777 elif blo < bhi:
2025-07-01 17:49:05.777 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.778
2025-07-01 17:49:05.778 > yield from g
2025-07-01 17:49:05.778
2025-07-01 17:49:05.778 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.778 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.778
2025-07-01 17:49:05.778 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.778 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.778 alo = 58, ahi = 1101
2025-07-01 17:49:05.778 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.778 blo = 58, bhi = 1101
2025-07-01 17:49:05.778
2025-07-01 17:49:05.778 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.778 r"""
2025-07-01 17:49:05.778 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.778 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.778 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.778 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.778
2025-07-01 17:49:05.778 Example:
2025-07-01 17:49:05.778
2025-07-01 17:49:05.779 >>> d = Differ()
2025-07-01 17:49:05.779 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.779 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.779 >>> print(''.join(results), end="")
2025-07-01 17:49:05.779 - abcDefghiJkl
2025-07-01 17:49:05.779 + abcdefGhijkl
2025-07-01 17:49:05.779 """
2025-07-01 17:49:05.779
2025-07-01 17:49:05.779 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.779 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.779 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.779 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.779 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.779
2025-07-01 17:49:05.779 # search for the pair that matches best without being identical
2025-07-01 17:49:05.779 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.779 # on junk -- unless we have to)
2025-07-01 17:49:05.779 for j in range(blo, bhi):
2025-07-01 17:49:05.779 bj = b[j]
2025-07-01 17:49:05.780 cruncher.set_seq2(bj)
2025-07-01 17:49:05.780 for i in range(alo, ahi):
2025-07-01 17:49:05.780 ai = a[i]
2025-07-01 17:49:05.780 if ai == bj:
2025-07-01 17:49:05.780 if eqi is None:
2025-07-01 17:49:05.780 eqi, eqj = i, j
2025-07-01 17:49:05.780 continue
2025-07-01 17:49:05.780 cruncher.set_seq1(ai)
2025-07-01 17:49:05.780 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.780 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.780 # compares by a factor of 3.
2025-07-01 17:49:05.780 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.780 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.780 # of the computation is cached by cruncher
2025-07-01 17:49:05.780 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.780 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.780 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.780 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.780 if best_ratio < cutoff:
2025-07-01 17:49:05.780 # no non-identical "pretty close" pair
2025-07-01 17:49:05.781 if eqi is None:
2025-07-01 17:49:05.781 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.781 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.781 return
2025-07-01 17:49:05.781 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.781 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.781 else:
2025-07-01 17:49:05.781 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.781 eqi = None
2025-07-01 17:49:05.781
2025-07-01 17:49:05.781 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.781 # identical
2025-07-01 17:49:05.781
2025-07-01 17:49:05.781 # pump out diffs from before the synch point
2025-07-01 17:49:05.781 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.781
2025-07-01 17:49:05.781 # do intraline marking on the synch pair
2025-07-01 17:49:05.781 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.781 if eqi is None:
2025-07-01 17:49:05.781 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.781 atags = btags = ""
2025-07-01 17:49:05.782 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.782 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.782 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.782 if tag == 'replace':
2025-07-01 17:49:05.782 atags += '^' * la
2025-07-01 17:49:05.782 btags += '^' * lb
2025-07-01 17:49:05.782 elif tag == 'delete':
2025-07-01 17:49:05.782 atags += '-' * la
2025-07-01 17:49:05.782 elif tag == 'insert':
2025-07-01 17:49:05.782 btags += '+' * lb
2025-07-01 17:49:05.782 elif tag == 'equal':
2025-07-01 17:49:05.782 atags += ' ' * la
2025-07-01 17:49:05.782 btags += ' ' * lb
2025-07-01 17:49:05.782 else:
2025-07-01 17:49:05.782 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.782 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.782 else:
2025-07-01 17:49:05.782 # the synch pair is identical
2025-07-01 17:49:05.782 yield ' ' + aelt
2025-07-01 17:49:05.782
2025-07-01 17:49:05.782 # pump out diffs from after the synch point
2025-07-01 17:49:05.783 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.783
2025-07-01 17:49:05.783 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.783 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.783
2025-07-01 17:49:05.783 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.783 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.783 alo = 59, ahi = 1101
2025-07-01 17:49:05.783 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.783 blo = 59, bhi = 1101
2025-07-01 17:49:05.783
2025-07-01 17:49:05.783 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.783 g = []
2025-07-01 17:49:05.783 if alo < ahi:
2025-07-01 17:49:05.783 if blo < bhi:
2025-07-01 17:49:05.783 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.783 else:
2025-07-01 17:49:05.783 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.783 elif blo < bhi:
2025-07-01 17:49:05.783 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.783
2025-07-01 17:49:05.784 > yield from g
2025-07-01 17:49:05.784
2025-07-01 17:49:05.784 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.784 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.784
2025-07-01 17:49:05.784 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.784 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.784 alo = 59, ahi = 1101
2025-07-01 17:49:05.784 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.784 blo = 59, bhi = 1101
2025-07-01 17:49:05.784
2025-07-01 17:49:05.784 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.784 r"""
2025-07-01 17:49:05.784 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.784 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.784 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.784 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.784
2025-07-01 17:49:05.784 Example:
2025-07-01 17:49:05.784
2025-07-01 17:49:05.785 >>> d = Differ()
2025-07-01 17:49:05.785 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.785 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.785 >>> print(''.join(results), end="")
2025-07-01 17:49:05.785 - abcDefghiJkl
2025-07-01 17:49:05.785 + abcdefGhijkl
2025-07-01 17:49:05.785 """
2025-07-01 17:49:05.785
2025-07-01 17:49:05.785 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.785 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.785 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.785 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.785 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.785
2025-07-01 17:49:05.785 # search for the pair that matches best without being identical
2025-07-01 17:49:05.785 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.785 # on junk -- unless we have to)
2025-07-01 17:49:05.785 for j in range(blo, bhi):
2025-07-01 17:49:05.786 bj = b[j]
2025-07-01 17:49:05.786 cruncher.set_seq2(bj)
2025-07-01 17:49:05.786 for i in range(alo, ahi):
2025-07-01 17:49:05.786 ai = a[i]
2025-07-01 17:49:05.786 if ai == bj:
2025-07-01 17:49:05.786 if eqi is None:
2025-07-01 17:49:05.786 eqi, eqj = i, j
2025-07-01 17:49:05.786 continue
2025-07-01 17:49:05.786 cruncher.set_seq1(ai)
2025-07-01 17:49:05.786 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.786 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.786 # compares by a factor of 3.
2025-07-01 17:49:05.786 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.786 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.786 # of the computation is cached by cruncher
2025-07-01 17:49:05.786 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.786 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.786 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.786 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.786 if best_ratio < cutoff:
2025-07-01 17:49:05.787 # no non-identical "pretty close" pair
2025-07-01 17:49:05.787 if eqi is None:
2025-07-01 17:49:05.787 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.787 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.787 return
2025-07-01 17:49:05.787 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.787 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.787 else:
2025-07-01 17:49:05.787 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.787 eqi = None
2025-07-01 17:49:05.787
2025-07-01 17:49:05.787 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.787 # identical
2025-07-01 17:49:05.787
2025-07-01 17:49:05.787 # pump out diffs from before the synch point
2025-07-01 17:49:05.787 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.787
2025-07-01 17:49:05.787 # do intraline marking on the synch pair
2025-07-01 17:49:05.787 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.788 if eqi is None:
2025-07-01 17:49:05.788 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.788 atags = btags = ""
2025-07-01 17:49:05.788 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.788 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.788 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.788 if tag == 'replace':
2025-07-01 17:49:05.788 atags += '^' * la
2025-07-01 17:49:05.788 btags += '^' * lb
2025-07-01 17:49:05.788 elif tag == 'delete':
2025-07-01 17:49:05.788 atags += '-' * la
2025-07-01 17:49:05.788 elif tag == 'insert':
2025-07-01 17:49:05.788 btags += '+' * lb
2025-07-01 17:49:05.788 elif tag == 'equal':
2025-07-01 17:49:05.788 atags += ' ' * la
2025-07-01 17:49:05.788 btags += ' ' * lb
2025-07-01 17:49:05.788 else:
2025-07-01 17:49:05.788 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.788 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.788 else:
2025-07-01 17:49:05.794 # the synch pair is identical
2025-07-01 17:49:05.794 yield ' ' + aelt
2025-07-01 17:49:05.794
2025-07-01 17:49:05.794 # pump out diffs from after the synch point
2025-07-01 17:49:05.794 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.794
2025-07-01 17:49:05.794 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.794 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.794
2025-07-01 17:49:05.794 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.794 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.794 alo = 60, ahi = 1101
2025-07-01 17:49:05.794 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.794 blo = 60, bhi = 1101
2025-07-01 17:49:05.794
2025-07-01 17:49:05.794 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.794 g = []
2025-07-01 17:49:05.794 if alo < ahi:
2025-07-01 17:49:05.794 if blo < bhi:
2025-07-01 17:49:05.794 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.794 else:
2025-07-01 17:49:05.795 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.795 elif blo < bhi:
2025-07-01 17:49:05.795 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.795
2025-07-01 17:49:05.795 > yield from g
2025-07-01 17:49:05.795
2025-07-01 17:49:05.795 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.795 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.795
2025-07-01 17:49:05.795 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.795 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.795 alo = 60, ahi = 1101
2025-07-01 17:49:05.795 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.795 blo = 60, bhi = 1101
2025-07-01 17:49:05.795
2025-07-01 17:49:05.795 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.795 r"""
2025-07-01 17:49:05.795 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.795 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.795 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.795 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.795
2025-07-01 17:49:05.796 Example:
2025-07-01 17:49:05.796
2025-07-01 17:49:05.796 >>> d = Differ()
2025-07-01 17:49:05.796 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.796 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.796 >>> print(''.join(results), end="")
2025-07-01 17:49:05.796 - abcDefghiJkl
2025-07-01 17:49:05.796 + abcdefGhijkl
2025-07-01 17:49:05.796 """
2025-07-01 17:49:05.796
2025-07-01 17:49:05.796 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.796 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.796 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.796 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.796 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.796
2025-07-01 17:49:05.796 # search for the pair that matches best without being identical
2025-07-01 17:49:05.796 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.797 # on junk -- unless we have to)
2025-07-01 17:49:05.797 for j in range(blo, bhi):
2025-07-01 17:49:05.797 bj = b[j]
2025-07-01 17:49:05.797 cruncher.set_seq2(bj)
2025-07-01 17:49:05.797 for i in range(alo, ahi):
2025-07-01 17:49:05.797 ai = a[i]
2025-07-01 17:49:05.797 if ai == bj:
2025-07-01 17:49:05.797 if eqi is None:
2025-07-01 17:49:05.797 eqi, eqj = i, j
2025-07-01 17:49:05.797 continue
2025-07-01 17:49:05.797 cruncher.set_seq1(ai)
2025-07-01 17:49:05.797 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.797 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.797 # compares by a factor of 3.
2025-07-01 17:49:05.797 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.797 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.797 # of the computation is cached by cruncher
2025-07-01 17:49:05.797 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.797 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.797 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.797 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.797 if best_ratio < cutoff:
2025-07-01 17:49:05.798 # no non-identical "pretty close" pair
2025-07-01 17:49:05.798 if eqi is None:
2025-07-01 17:49:05.798 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.798 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.798 return
2025-07-01 17:49:05.798 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.798 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.798 else:
2025-07-01 17:49:05.798 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.798 eqi = None
2025-07-01 17:49:05.798
2025-07-01 17:49:05.798 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.798 # identical
2025-07-01 17:49:05.798
2025-07-01 17:49:05.798 # pump out diffs from before the synch point
2025-07-01 17:49:05.798 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.798
2025-07-01 17:49:05.798 # do intraline marking on the synch pair
2025-07-01 17:49:05.798 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.798 if eqi is None:
2025-07-01 17:49:05.798 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.799 atags = btags = ""
2025-07-01 17:49:05.799 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.799 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.799 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.799 if tag == 'replace':
2025-07-01 17:49:05.799 atags += '^' * la
2025-07-01 17:49:05.799 btags += '^' * lb
2025-07-01 17:49:05.799 elif tag == 'delete':
2025-07-01 17:49:05.799 atags += '-' * la
2025-07-01 17:49:05.799 elif tag == 'insert':
2025-07-01 17:49:05.799 btags += '+' * lb
2025-07-01 17:49:05.799 elif tag == 'equal':
2025-07-01 17:49:05.799 atags += ' ' * la
2025-07-01 17:49:05.799 btags += ' ' * lb
2025-07-01 17:49:05.799 else:
2025-07-01 17:49:05.799 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.799 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.799 else:
2025-07-01 17:49:05.799 # the synch pair is identical
2025-07-01 17:49:05.799 yield ' ' + aelt
2025-07-01 17:49:05.800
2025-07-01 17:49:05.800 # pump out diffs from after the synch point
2025-07-01 17:49:05.800 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.800
2025-07-01 17:49:05.800 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.800 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.800
2025-07-01 17:49:05.800 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.800 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.800 alo = 61, ahi = 1101
2025-07-01 17:49:05.800 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.800 blo = 61, bhi = 1101
2025-07-01 17:49:05.800
2025-07-01 17:49:05.800 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.800 g = []
2025-07-01 17:49:05.800 if alo < ahi:
2025-07-01 17:49:05.800 if blo < bhi:
2025-07-01 17:49:05.800 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.800 else:
2025-07-01 17:49:05.800 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.801 elif blo < bhi:
2025-07-01 17:49:05.801 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.801
2025-07-01 17:49:05.801 > yield from g
2025-07-01 17:49:05.801
2025-07-01 17:49:05.801 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.801 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.801
2025-07-01 17:49:05.801 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.801 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.801 alo = 61, ahi = 1101
2025-07-01 17:49:05.801 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.801 blo = 61, bhi = 1101
2025-07-01 17:49:05.801
2025-07-01 17:49:05.801 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.801 r"""
2025-07-01 17:49:05.801 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.801 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.801 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.801 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.802
2025-07-01 17:49:05.802 Example:
2025-07-01 17:49:05.802
2025-07-01 17:49:05.802 >>> d = Differ()
2025-07-01 17:49:05.802 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.802 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.802 >>> print(''.join(results), end="")
2025-07-01 17:49:05.802 - abcDefghiJkl
2025-07-01 17:49:05.802 + abcdefGhijkl
2025-07-01 17:49:05.802 """
2025-07-01 17:49:05.802
2025-07-01 17:49:05.802 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.802 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.802 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.802 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.802 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.802
2025-07-01 17:49:05.802 # search for the pair that matches best without being identical
2025-07-01 17:49:05.802 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.803 # on junk -- unless we have to)
2025-07-01 17:49:05.803 for j in range(blo, bhi):
2025-07-01 17:49:05.803 bj = b[j]
2025-07-01 17:49:05.803 cruncher.set_seq2(bj)
2025-07-01 17:49:05.803 for i in range(alo, ahi):
2025-07-01 17:49:05.803 ai = a[i]
2025-07-01 17:49:05.803 if ai == bj:
2025-07-01 17:49:05.803 if eqi is None:
2025-07-01 17:49:05.803 eqi, eqj = i, j
2025-07-01 17:49:05.803 continue
2025-07-01 17:49:05.803 cruncher.set_seq1(ai)
2025-07-01 17:49:05.803 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.803 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.803 # compares by a factor of 3.
2025-07-01 17:49:05.803 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.803 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.803 # of the computation is cached by cruncher
2025-07-01 17:49:05.803 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.803 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.803 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.804 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.804 if best_ratio < cutoff:
2025-07-01 17:49:05.804 # no non-identical "pretty close" pair
2025-07-01 17:49:05.804 if eqi is None:
2025-07-01 17:49:05.804 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.804 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.804 return
2025-07-01 17:49:05.804 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.804 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.804 else:
2025-07-01 17:49:05.804 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.804 eqi = None
2025-07-01 17:49:05.804
2025-07-01 17:49:05.804 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.804 # identical
2025-07-01 17:49:05.804
2025-07-01 17:49:05.804 # pump out diffs from before the synch point
2025-07-01 17:49:05.804 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.804
2025-07-01 17:49:05.804 # do intraline marking on the synch pair
2025-07-01 17:49:05.804 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.808 if eqi is None:
2025-07-01 17:49:05.808 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.808 atags = btags = ""
2025-07-01 17:49:05.808 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.808 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.808 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.808 if tag == 'replace':
2025-07-01 17:49:05.808 atags += '^' * la
2025-07-01 17:49:05.808 btags += '^' * lb
2025-07-01 17:49:05.808 elif tag == 'delete':
2025-07-01 17:49:05.808 atags += '-' * la
2025-07-01 17:49:05.808 elif tag == 'insert':
2025-07-01 17:49:05.808 btags += '+' * lb
2025-07-01 17:49:05.808 elif tag == 'equal':
2025-07-01 17:49:05.808 atags += ' ' * la
2025-07-01 17:49:05.808 btags += ' ' * lb
2025-07-01 17:49:05.808 else:
2025-07-01 17:49:05.808 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.809 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.809 else:
2025-07-01 17:49:05.809 # the synch pair is identical
2025-07-01 17:49:05.809 yield ' ' + aelt
2025-07-01 17:49:05.809
2025-07-01 17:49:05.809 # pump out diffs from after the synch point
2025-07-01 17:49:05.809 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.809
2025-07-01 17:49:05.809 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.809 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.809
2025-07-01 17:49:05.809 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.809 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.809 alo = 62, ahi = 1101
2025-07-01 17:49:05.809 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.809 blo = 62, bhi = 1101
2025-07-01 17:49:05.809
2025-07-01 17:49:05.809 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.809 g = []
2025-07-01 17:49:05.809 if alo < ahi:
2025-07-01 17:49:05.810 if blo < bhi:
2025-07-01 17:49:05.810 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.810 else:
2025-07-01 17:49:05.810 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.810 elif blo < bhi:
2025-07-01 17:49:05.810 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.810
2025-07-01 17:49:05.810 > yield from g
2025-07-01 17:49:05.810
2025-07-01 17:49:05.810 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.810 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.810
2025-07-01 17:49:05.810 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.810 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.810 alo = 62, ahi = 1101
2025-07-01 17:49:05.810 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.810 blo = 62, bhi = 1101
2025-07-01 17:49:05.810
2025-07-01 17:49:05.810 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.810 r"""
2025-07-01 17:49:05.811 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.811 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.811 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.811 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.811
2025-07-01 17:49:05.811 Example:
2025-07-01 17:49:05.811
2025-07-01 17:49:05.811 >>> d = Differ()
2025-07-01 17:49:05.811 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.811 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.811 >>> print(''.join(results), end="")
2025-07-01 17:49:05.811 - abcDefghiJkl
2025-07-01 17:49:05.811 + abcdefGhijkl
2025-07-01 17:49:05.811 """
2025-07-01 17:49:05.811
2025-07-01 17:49:05.811 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.811 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.811 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.811 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.812 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.812
2025-07-01 17:49:05.812 # search for the pair that matches best without being identical
2025-07-01 17:49:05.812 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.812 # on junk -- unless we have to)
2025-07-01 17:49:05.812 for j in range(blo, bhi):
2025-07-01 17:49:05.812 bj = b[j]
2025-07-01 17:49:05.812 cruncher.set_seq2(bj)
2025-07-01 17:49:05.812 for i in range(alo, ahi):
2025-07-01 17:49:05.812 ai = a[i]
2025-07-01 17:49:05.812 if ai == bj:
2025-07-01 17:49:05.812 if eqi is None:
2025-07-01 17:49:05.812 eqi, eqj = i, j
2025-07-01 17:49:05.812 continue
2025-07-01 17:49:05.812 cruncher.set_seq1(ai)
2025-07-01 17:49:05.812 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.812 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.812 # compares by a factor of 3.
2025-07-01 17:49:05.812 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.812 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.812 # of the computation is cached by cruncher
2025-07-01 17:49:05.813 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.813 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.813 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.813 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.813 if best_ratio < cutoff:
2025-07-01 17:49:05.813 # no non-identical "pretty close" pair
2025-07-01 17:49:05.813 if eqi is None:
2025-07-01 17:49:05.813 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.813 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.813 return
2025-07-01 17:49:05.813 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.813 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.813 else:
2025-07-01 17:49:05.813 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.813 eqi = None
2025-07-01 17:49:05.813
2025-07-01 17:49:05.813 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.813 # identical
2025-07-01 17:49:05.813
2025-07-01 17:49:05.813 # pump out diffs from before the synch point
2025-07-01 17:49:05.813 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.814
2025-07-01 17:49:05.814 # do intraline marking on the synch pair
2025-07-01 17:49:05.814 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.814 if eqi is None:
2025-07-01 17:49:05.814 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.814 atags = btags = ""
2025-07-01 17:49:05.814 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.814 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.814 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.814 if tag == 'replace':
2025-07-01 17:49:05.814 atags += '^' * la
2025-07-01 17:49:05.814 btags += '^' * lb
2025-07-01 17:49:05.814 elif tag == 'delete':
2025-07-01 17:49:05.814 atags += '-' * la
2025-07-01 17:49:05.814 elif tag == 'insert':
2025-07-01 17:49:05.814 btags += '+' * lb
2025-07-01 17:49:05.814 elif tag == 'equal':
2025-07-01 17:49:05.814 atags += ' ' * la
2025-07-01 17:49:05.814 btags += ' ' * lb
2025-07-01 17:49:05.814 else:
2025-07-01 17:49:05.814 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.815 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.815 else:
2025-07-01 17:49:05.815 # the synch pair is identical
2025-07-01 17:49:05.815 yield ' ' + aelt
2025-07-01 17:49:05.815
2025-07-01 17:49:05.815 # pump out diffs from after the synch point
2025-07-01 17:49:05.815 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.815
2025-07-01 17:49:05.815 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.815 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.815
2025-07-01 17:49:05.815 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.815 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.815 alo = 63, ahi = 1101
2025-07-01 17:49:05.815 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.815 blo = 63, bhi = 1101
2025-07-01 17:49:05.815
2025-07-01 17:49:05.815 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.815 g = []
2025-07-01 17:49:05.815 if alo < ahi:
2025-07-01 17:49:05.816 if blo < bhi:
2025-07-01 17:49:05.816 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.816 else:
2025-07-01 17:49:05.816 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.816 elif blo < bhi:
2025-07-01 17:49:05.816 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.816
2025-07-01 17:49:05.816 > yield from g
2025-07-01 17:49:05.816
2025-07-01 17:49:05.816 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.816 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.816
2025-07-01 17:49:05.816 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.816 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.816 alo = 63, ahi = 1101
2025-07-01 17:49:05.816 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.816 blo = 63, bhi = 1101
2025-07-01 17:49:05.816
2025-07-01 17:49:05.816 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.816 r"""
2025-07-01 17:49:05.816 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.817 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.817 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.817 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.817
2025-07-01 17:49:05.817 Example:
2025-07-01 17:49:05.817
2025-07-01 17:49:05.817 >>> d = Differ()
2025-07-01 17:49:05.817 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.817 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.817 >>> print(''.join(results), end="")
2025-07-01 17:49:05.817 - abcDefghiJkl
2025-07-01 17:49:05.817 + abcdefGhijkl
2025-07-01 17:49:05.817 """
2025-07-01 17:49:05.817
2025-07-01 17:49:05.817 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.817 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.817 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.817 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.818 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.818
2025-07-01 17:49:05.818 # search for the pair that matches best without being identical
2025-07-01 17:49:05.818 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.818 # on junk -- unless we have to)
2025-07-01 17:49:05.818 for j in range(blo, bhi):
2025-07-01 17:49:05.818 bj = b[j]
2025-07-01 17:49:05.818 cruncher.set_seq2(bj)
2025-07-01 17:49:05.818 for i in range(alo, ahi):
2025-07-01 17:49:05.818 ai = a[i]
2025-07-01 17:49:05.818 if ai == bj:
2025-07-01 17:49:05.818 if eqi is None:
2025-07-01 17:49:05.818 eqi, eqj = i, j
2025-07-01 17:49:05.818 continue
2025-07-01 17:49:05.818 cruncher.set_seq1(ai)
2025-07-01 17:49:05.818 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.818 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.818 # compares by a factor of 3.
2025-07-01 17:49:05.818 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.818 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.818 # of the computation is cached by cruncher
2025-07-01 17:49:05.819 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.819 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.819 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.819 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.819 if best_ratio < cutoff:
2025-07-01 17:49:05.819 # no non-identical "pretty close" pair
2025-07-01 17:49:05.819 if eqi is None:
2025-07-01 17:49:05.819 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.819 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.819 return
2025-07-01 17:49:05.819 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.819 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.819 else:
2025-07-01 17:49:05.819 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.819 eqi = None
2025-07-01 17:49:05.819
2025-07-01 17:49:05.819 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.819 # identical
2025-07-01 17:49:05.819
2025-07-01 17:49:05.819 # pump out diffs from before the synch point
2025-07-01 17:49:05.819 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.820
2025-07-01 17:49:05.820 # do intraline marking on the synch pair
2025-07-01 17:49:05.820 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.820 if eqi is None:
2025-07-01 17:49:05.820 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.820 atags = btags = ""
2025-07-01 17:49:05.820 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.820 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.820 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.820 if tag == 'replace':
2025-07-01 17:49:05.820 atags += '^' * la
2025-07-01 17:49:05.820 btags += '^' * lb
2025-07-01 17:49:05.820 elif tag == 'delete':
2025-07-01 17:49:05.820 atags += '-' * la
2025-07-01 17:49:05.820 elif tag == 'insert':
2025-07-01 17:49:05.820 btags += '+' * lb
2025-07-01 17:49:05.820 elif tag == 'equal':
2025-07-01 17:49:05.820 atags += ' ' * la
2025-07-01 17:49:05.820 btags += ' ' * lb
2025-07-01 17:49:05.820 else:
2025-07-01 17:49:05.820 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.826 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.826 else:
2025-07-01 17:49:05.826 # the synch pair is identical
2025-07-01 17:49:05.826 yield ' ' + aelt
2025-07-01 17:49:05.826
2025-07-01 17:49:05.826 # pump out diffs from after the synch point
2025-07-01 17:49:05.826 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.826
2025-07-01 17:49:05.826 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.826 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.826
2025-07-01 17:49:05.826 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.826 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.826 alo = 64, ahi = 1101
2025-07-01 17:49:05.826 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.826 blo = 64, bhi = 1101
2025-07-01 17:49:05.826
2025-07-01 17:49:05.826 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.826 g = []
2025-07-01 17:49:05.826 if alo < ahi:
2025-07-01 17:49:05.827 if blo < bhi:
2025-07-01 17:49:05.827 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.827 else:
2025-07-01 17:49:05.827 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.827 elif blo < bhi:
2025-07-01 17:49:05.827 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.827
2025-07-01 17:49:05.827 > yield from g
2025-07-01 17:49:05.827
2025-07-01 17:49:05.827 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.827 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.827
2025-07-01 17:49:05.827 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.827 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.827 alo = 64, ahi = 1101
2025-07-01 17:49:05.827 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.827 blo = 64, bhi = 1101
2025-07-01 17:49:05.827
2025-07-01 17:49:05.827 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.827 r"""
2025-07-01 17:49:05.828 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.828 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.828 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.828 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.828
2025-07-01 17:49:05.828 Example:
2025-07-01 17:49:05.828
2025-07-01 17:49:05.828 >>> d = Differ()
2025-07-01 17:49:05.828 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.828 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.828 >>> print(''.join(results), end="")
2025-07-01 17:49:05.828 - abcDefghiJkl
2025-07-01 17:49:05.828 + abcdefGhijkl
2025-07-01 17:49:05.828 """
2025-07-01 17:49:05.828
2025-07-01 17:49:05.828 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.828 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.829 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.829 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.829 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.829
2025-07-01 17:49:05.829 # search for the pair that matches best without being identical
2025-07-01 17:49:05.829 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.829 # on junk -- unless we have to)
2025-07-01 17:49:05.829 for j in range(blo, bhi):
2025-07-01 17:49:05.829 bj = b[j]
2025-07-01 17:49:05.829 cruncher.set_seq2(bj)
2025-07-01 17:49:05.829 for i in range(alo, ahi):
2025-07-01 17:49:05.829 ai = a[i]
2025-07-01 17:49:05.829 if ai == bj:
2025-07-01 17:49:05.829 if eqi is None:
2025-07-01 17:49:05.829 eqi, eqj = i, j
2025-07-01 17:49:05.829 continue
2025-07-01 17:49:05.829 cruncher.set_seq1(ai)
2025-07-01 17:49:05.829 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.829 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.829 # compares by a factor of 3.
2025-07-01 17:49:05.830 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.830 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.830 # of the computation is cached by cruncher
2025-07-01 17:49:05.830 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.830 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.830 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.830 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.830 if best_ratio < cutoff:
2025-07-01 17:49:05.830 # no non-identical "pretty close" pair
2025-07-01 17:49:05.830 if eqi is None:
2025-07-01 17:49:05.830 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.830 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.830 return
2025-07-01 17:49:05.830 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.830 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.830 else:
2025-07-01 17:49:05.830 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.830 eqi = None
2025-07-01 17:49:05.831
2025-07-01 17:49:05.831 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.831 # identical
2025-07-01 17:49:05.831
2025-07-01 17:49:05.831 # pump out diffs from before the synch point
2025-07-01 17:49:05.831 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.831
2025-07-01 17:49:05.831 # do intraline marking on the synch pair
2025-07-01 17:49:05.831 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.831 if eqi is None:
2025-07-01 17:49:05.831 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.831 atags = btags = ""
2025-07-01 17:49:05.831 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.831 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.831 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.831 if tag == 'replace':
2025-07-01 17:49:05.831 atags += '^' * la
2025-07-01 17:49:05.831 btags += '^' * lb
2025-07-01 17:49:05.831 elif tag == 'delete':
2025-07-01 17:49:05.832 atags += '-' * la
2025-07-01 17:49:05.832 elif tag == 'insert':
2025-07-01 17:49:05.832 btags += '+' * lb
2025-07-01 17:49:05.832 elif tag == 'equal':
2025-07-01 17:49:05.832 atags += ' ' * la
2025-07-01 17:49:05.832 btags += ' ' * lb
2025-07-01 17:49:05.832 else:
2025-07-01 17:49:05.832 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.832 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.832 else:
2025-07-01 17:49:05.832 # the synch pair is identical
2025-07-01 17:49:05.832 yield ' ' + aelt
2025-07-01 17:49:05.832
2025-07-01 17:49:05.832 # pump out diffs from after the synch point
2025-07-01 17:49:05.832 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.832
2025-07-01 17:49:05.832 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.832 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.832
2025-07-01 17:49:05.832 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.833 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.833 alo = 65, ahi = 1101
2025-07-01 17:49:05.833 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.833 blo = 65, bhi = 1101
2025-07-01 17:49:05.833
2025-07-01 17:49:05.833 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.833 g = []
2025-07-01 17:49:05.833 if alo < ahi:
2025-07-01 17:49:05.833 if blo < bhi:
2025-07-01 17:49:05.833 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.833 else:
2025-07-01 17:49:05.833 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.833 elif blo < bhi:
2025-07-01 17:49:05.833 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.833
2025-07-01 17:49:05.833 > yield from g
2025-07-01 17:49:05.833
2025-07-01 17:49:05.833 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.833 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.833
2025-07-01 17:49:05.834 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.834 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.834 alo = 65, ahi = 1101
2025-07-01 17:49:05.834 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.834 blo = 65, bhi = 1101
2025-07-01 17:49:05.834
2025-07-01 17:49:05.834 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.834 r"""
2025-07-01 17:49:05.834 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.834 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.834 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.834 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.834
2025-07-01 17:49:05.834 Example:
2025-07-01 17:49:05.834
2025-07-01 17:49:05.834 >>> d = Differ()
2025-07-01 17:49:05.834 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.834 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.834 >>> print(''.join(results), end="")
2025-07-01 17:49:05.834 - abcDefghiJkl
2025-07-01 17:49:05.835 + abcdefGhijkl
2025-07-01 17:49:05.835 """
2025-07-01 17:49:05.835
2025-07-01 17:49:05.835 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.835 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.835 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.835 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.835 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.835
2025-07-01 17:49:05.835 # search for the pair that matches best without being identical
2025-07-01 17:49:05.835 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.835 # on junk -- unless we have to)
2025-07-01 17:49:05.835 for j in range(blo, bhi):
2025-07-01 17:49:05.835 bj = b[j]
2025-07-01 17:49:05.835 cruncher.set_seq2(bj)
2025-07-01 17:49:05.835 for i in range(alo, ahi):
2025-07-01 17:49:05.835 ai = a[i]
2025-07-01 17:49:05.835 if ai == bj:
2025-07-01 17:49:05.835 if eqi is None:
2025-07-01 17:49:05.836 eqi, eqj = i, j
2025-07-01 17:49:05.836 continue
2025-07-01 17:49:05.836 cruncher.set_seq1(ai)
2025-07-01 17:49:05.836 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.836 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.836 # compares by a factor of 3.
2025-07-01 17:49:05.836 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.836 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.836 # of the computation is cached by cruncher
2025-07-01 17:49:05.836 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.836 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.836 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.836 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.836 if best_ratio < cutoff:
2025-07-01 17:49:05.836 # no non-identical "pretty close" pair
2025-07-01 17:49:05.836 if eqi is None:
2025-07-01 17:49:05.836 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.836 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.836 return
2025-07-01 17:49:05.836 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.836 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.837 else:
2025-07-01 17:49:05.840 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.840 eqi = None
2025-07-01 17:49:05.840
2025-07-01 17:49:05.840 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.840 # identical
2025-07-01 17:49:05.840
2025-07-01 17:49:05.840 # pump out diffs from before the synch point
2025-07-01 17:49:05.840 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.840
2025-07-01 17:49:05.840 # do intraline marking on the synch pair
2025-07-01 17:49:05.840 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.840 if eqi is None:
2025-07-01 17:49:05.840 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.840 atags = btags = ""
2025-07-01 17:49:05.840 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.840 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.840 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.840 if tag == 'replace':
2025-07-01 17:49:05.840 atags += '^' * la
2025-07-01 17:49:05.840 btags += '^' * lb
2025-07-01 17:49:05.841 elif tag == 'delete':
2025-07-01 17:49:05.841 atags += '-' * la
2025-07-01 17:49:05.841 elif tag == 'insert':
2025-07-01 17:49:05.841 btags += '+' * lb
2025-07-01 17:49:05.841 elif tag == 'equal':
2025-07-01 17:49:05.841 atags += ' ' * la
2025-07-01 17:49:05.841 btags += ' ' * lb
2025-07-01 17:49:05.841 else:
2025-07-01 17:49:05.841 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.841 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.841 else:
2025-07-01 17:49:05.841 # the synch pair is identical
2025-07-01 17:49:05.841 yield ' ' + aelt
2025-07-01 17:49:05.841
2025-07-01 17:49:05.841 # pump out diffs from after the synch point
2025-07-01 17:49:05.841 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.841
2025-07-01 17:49:05.841 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.841 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.841
2025-07-01 17:49:05.842 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.842 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.842 alo = 66, ahi = 1101
2025-07-01 17:49:05.842 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.842 blo = 66, bhi = 1101
2025-07-01 17:49:05.842
2025-07-01 17:49:05.842 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.842 g = []
2025-07-01 17:49:05.842 if alo < ahi:
2025-07-01 17:49:05.842 if blo < bhi:
2025-07-01 17:49:05.842 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.842 else:
2025-07-01 17:49:05.842 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.842 elif blo < bhi:
2025-07-01 17:49:05.842 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.842
2025-07-01 17:49:05.842 > yield from g
2025-07-01 17:49:05.842
2025-07-01 17:49:05.842 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.842 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.842
2025-07-01 17:49:05.843 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.843 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.843 alo = 66, ahi = 1101
2025-07-01 17:49:05.843 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.843 blo = 66, bhi = 1101
2025-07-01 17:49:05.843
2025-07-01 17:49:05.843 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.843 r"""
2025-07-01 17:49:05.843 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.843 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.843 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.843 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.843
2025-07-01 17:49:05.843 Example:
2025-07-01 17:49:05.843
2025-07-01 17:49:05.843 >>> d = Differ()
2025-07-01 17:49:05.843 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.844 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.844 >>> print(''.join(results), end="")
2025-07-01 17:49:05.844 - abcDefghiJkl
2025-07-01 17:49:05.844 + abcdefGhijkl
2025-07-01 17:49:05.844 """
2025-07-01 17:49:05.844
2025-07-01 17:49:05.844 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.844 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.844 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.844 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.844 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.844
2025-07-01 17:49:05.844 # search for the pair that matches best without being identical
2025-07-01 17:49:05.844 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.844 # on junk -- unless we have to)
2025-07-01 17:49:05.844 for j in range(blo, bhi):
2025-07-01 17:49:05.844 bj = b[j]
2025-07-01 17:49:05.844 cruncher.set_seq2(bj)
2025-07-01 17:49:05.844 for i in range(alo, ahi):
2025-07-01 17:49:05.845 ai = a[i]
2025-07-01 17:49:05.845 if ai == bj:
2025-07-01 17:49:05.845 if eqi is None:
2025-07-01 17:49:05.845 eqi, eqj = i, j
2025-07-01 17:49:05.845 continue
2025-07-01 17:49:05.845 cruncher.set_seq1(ai)
2025-07-01 17:49:05.845 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.845 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.845 # compares by a factor of 3.
2025-07-01 17:49:05.845 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.845 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.845 # of the computation is cached by cruncher
2025-07-01 17:49:05.845 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.845 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.845 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.845 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.845 if best_ratio < cutoff:
2025-07-01 17:49:05.845 # no non-identical "pretty close" pair
2025-07-01 17:49:05.845 if eqi is None:
2025-07-01 17:49:05.845 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.846 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.846 return
2025-07-01 17:49:05.846 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.846 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.846 else:
2025-07-01 17:49:05.846 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.846 eqi = None
2025-07-01 17:49:05.846
2025-07-01 17:49:05.846 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.846 # identical
2025-07-01 17:49:05.846
2025-07-01 17:49:05.846 # pump out diffs from before the synch point
2025-07-01 17:49:05.846 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.846
2025-07-01 17:49:05.846 # do intraline marking on the synch pair
2025-07-01 17:49:05.846 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.846 if eqi is None:
2025-07-01 17:49:05.846 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.846 atags = btags = ""
2025-07-01 17:49:05.846 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.846 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.847 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.847 if tag == 'replace':
2025-07-01 17:49:05.847 atags += '^' * la
2025-07-01 17:49:05.847 btags += '^' * lb
2025-07-01 17:49:05.847 elif tag == 'delete':
2025-07-01 17:49:05.847 atags += '-' * la
2025-07-01 17:49:05.847 elif tag == 'insert':
2025-07-01 17:49:05.847 btags += '+' * lb
2025-07-01 17:49:05.847 elif tag == 'equal':
2025-07-01 17:49:05.847 atags += ' ' * la
2025-07-01 17:49:05.847 btags += ' ' * lb
2025-07-01 17:49:05.847 else:
2025-07-01 17:49:05.847 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.847 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.847 else:
2025-07-01 17:49:05.847 # the synch pair is identical
2025-07-01 17:49:05.847 yield ' ' + aelt
2025-07-01 17:49:05.847
2025-07-01 17:49:05.847 # pump out diffs from after the synch point
2025-07-01 17:49:05.847 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.847
2025-07-01 17:49:05.848 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.848 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.848
2025-07-01 17:49:05.848 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.848 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.848 alo = 67, ahi = 1101
2025-07-01 17:49:05.848 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.848 blo = 67, bhi = 1101
2025-07-01 17:49:05.848
2025-07-01 17:49:05.848 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.848 g = []
2025-07-01 17:49:05.848 if alo < ahi:
2025-07-01 17:49:05.848 if blo < bhi:
2025-07-01 17:49:05.848 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.848 else:
2025-07-01 17:49:05.848 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.848 elif blo < bhi:
2025-07-01 17:49:05.848 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.848
2025-07-01 17:49:05.848 > yield from g
2025-07-01 17:49:05.849
2025-07-01 17:49:05.849 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.849 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.849
2025-07-01 17:49:05.849 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.849 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.849 alo = 67, ahi = 1101
2025-07-01 17:49:05.849 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.849 blo = 67, bhi = 1101
2025-07-01 17:49:05.849
2025-07-01 17:49:05.849 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.849 r"""
2025-07-01 17:49:05.849 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.849 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.849 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.849 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.849
2025-07-01 17:49:05.849 Example:
2025-07-01 17:49:05.849
2025-07-01 17:49:05.849 >>> d = Differ()
2025-07-01 17:49:05.849 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.850 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.850 >>> print(''.join(results), end="")
2025-07-01 17:49:05.850 - abcDefghiJkl
2025-07-01 17:49:05.850 + abcdefGhijkl
2025-07-01 17:49:05.850 """
2025-07-01 17:49:05.850
2025-07-01 17:49:05.850 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.850 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.850 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.850 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.850 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.850
2025-07-01 17:49:05.850 # search for the pair that matches best without being identical
2025-07-01 17:49:05.850 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.850 # on junk -- unless we have to)
2025-07-01 17:49:05.850 for j in range(blo, bhi):
2025-07-01 17:49:05.850 bj = b[j]
2025-07-01 17:49:05.850 cruncher.set_seq2(bj)
2025-07-01 17:49:05.850 for i in range(alo, ahi):
2025-07-01 17:49:05.851 ai = a[i]
2025-07-01 17:49:05.851 if ai == bj:
2025-07-01 17:49:05.851 if eqi is None:
2025-07-01 17:49:05.851 eqi, eqj = i, j
2025-07-01 17:49:05.851 continue
2025-07-01 17:49:05.851 cruncher.set_seq1(ai)
2025-07-01 17:49:05.851 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.851 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.851 # compares by a factor of 3.
2025-07-01 17:49:05.851 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.851 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.851 # of the computation is cached by cruncher
2025-07-01 17:49:05.851 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.851 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.851 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.851 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.851 if best_ratio < cutoff:
2025-07-01 17:49:05.851 # no non-identical "pretty close" pair
2025-07-01 17:49:05.852 if eqi is None:
2025-07-01 17:49:05.852 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.852 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.852 return
2025-07-01 17:49:05.852 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.852 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.852 else:
2025-07-01 17:49:05.852 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.852 eqi = None
2025-07-01 17:49:05.852
2025-07-01 17:49:05.852 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.852 # identical
2025-07-01 17:49:05.852
2025-07-01 17:49:05.852 # pump out diffs from before the synch point
2025-07-01 17:49:05.852 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.852
2025-07-01 17:49:05.852 # do intraline marking on the synch pair
2025-07-01 17:49:05.852 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.852 if eqi is None:
2025-07-01 17:49:05.852 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.852 atags = btags = ""
2025-07-01 17:49:05.853 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.858 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.858 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.858 if tag == 'replace':
2025-07-01 17:49:05.858 atags += '^' * la
2025-07-01 17:49:05.858 btags += '^' * lb
2025-07-01 17:49:05.858 elif tag == 'delete':
2025-07-01 17:49:05.858 atags += '-' * la
2025-07-01 17:49:05.858 elif tag == 'insert':
2025-07-01 17:49:05.858 btags += '+' * lb
2025-07-01 17:49:05.858 elif tag == 'equal':
2025-07-01 17:49:05.858 atags += ' ' * la
2025-07-01 17:49:05.858 btags += ' ' * lb
2025-07-01 17:49:05.858 else:
2025-07-01 17:49:05.858 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.858 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.858 else:
2025-07-01 17:49:05.858 # the synch pair is identical
2025-07-01 17:49:05.858 yield ' ' + aelt
2025-07-01 17:49:05.858
2025-07-01 17:49:05.859 # pump out diffs from after the synch point
2025-07-01 17:49:05.859 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.859
2025-07-01 17:49:05.859 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.859 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.859
2025-07-01 17:49:05.859 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.859 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.859 alo = 70, ahi = 1101
2025-07-01 17:49:05.859 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.859 blo = 70, bhi = 1101
2025-07-01 17:49:05.859
2025-07-01 17:49:05.859 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.859 g = []
2025-07-01 17:49:05.859 if alo < ahi:
2025-07-01 17:49:05.859 if blo < bhi:
2025-07-01 17:49:05.859 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.859 else:
2025-07-01 17:49:05.859 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.859 elif blo < bhi:
2025-07-01 17:49:05.859 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.860
2025-07-01 17:49:05.860 > yield from g
2025-07-01 17:49:05.860
2025-07-01 17:49:05.860 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.860 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.860
2025-07-01 17:49:05.860 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.860 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.860 alo = 70, ahi = 1101
2025-07-01 17:49:05.860 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.860 blo = 70, bhi = 1101
2025-07-01 17:49:05.860
2025-07-01 17:49:05.860 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.860 r"""
2025-07-01 17:49:05.860 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.860 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.860 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.860 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.860
2025-07-01 17:49:05.860 Example:
2025-07-01 17:49:05.860
2025-07-01 17:49:05.861 >>> d = Differ()
2025-07-01 17:49:05.861 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.861 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.861 >>> print(''.join(results), end="")
2025-07-01 17:49:05.861 - abcDefghiJkl
2025-07-01 17:49:05.861 + abcdefGhijkl
2025-07-01 17:49:05.861 """
2025-07-01 17:49:05.861
2025-07-01 17:49:05.861 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.861 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.861 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.861 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.861 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.861
2025-07-01 17:49:05.861 # search for the pair that matches best without being identical
2025-07-01 17:49:05.861 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.861 # on junk -- unless we have to)
2025-07-01 17:49:05.861 for j in range(blo, bhi):
2025-07-01 17:49:05.861 bj = b[j]
2025-07-01 17:49:05.862 cruncher.set_seq2(bj)
2025-07-01 17:49:05.862 for i in range(alo, ahi):
2025-07-01 17:49:05.862 ai = a[i]
2025-07-01 17:49:05.862 if ai == bj:
2025-07-01 17:49:05.862 if eqi is None:
2025-07-01 17:49:05.862 eqi, eqj = i, j
2025-07-01 17:49:05.862 continue
2025-07-01 17:49:05.862 cruncher.set_seq1(ai)
2025-07-01 17:49:05.862 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.862 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.862 # compares by a factor of 3.
2025-07-01 17:49:05.862 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.862 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.862 # of the computation is cached by cruncher
2025-07-01 17:49:05.862 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.862 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.862 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.862 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.862 if best_ratio < cutoff:
2025-07-01 17:49:05.862 # no non-identical "pretty close" pair
2025-07-01 17:49:05.862 if eqi is None:
2025-07-01 17:49:05.863 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.863 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.863 return
2025-07-01 17:49:05.863 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.863 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.863 else:
2025-07-01 17:49:05.863 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.863 eqi = None
2025-07-01 17:49:05.863
2025-07-01 17:49:05.863 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.863 # identical
2025-07-01 17:49:05.863
2025-07-01 17:49:05.863 # pump out diffs from before the synch point
2025-07-01 17:49:05.863 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.863
2025-07-01 17:49:05.863 # do intraline marking on the synch pair
2025-07-01 17:49:05.863 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.863 if eqi is None:
2025-07-01 17:49:05.863 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.863 atags = btags = ""
2025-07-01 17:49:05.863 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.863 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.864 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.864 if tag == 'replace':
2025-07-01 17:49:05.864 atags += '^' * la
2025-07-01 17:49:05.864 btags += '^' * lb
2025-07-01 17:49:05.864 elif tag == 'delete':
2025-07-01 17:49:05.864 atags += '-' * la
2025-07-01 17:49:05.864 elif tag == 'insert':
2025-07-01 17:49:05.864 btags += '+' * lb
2025-07-01 17:49:05.864 elif tag == 'equal':
2025-07-01 17:49:05.864 atags += ' ' * la
2025-07-01 17:49:05.864 btags += ' ' * lb
2025-07-01 17:49:05.864 else:
2025-07-01 17:49:05.864 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.864 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.864 else:
2025-07-01 17:49:05.864 # the synch pair is identical
2025-07-01 17:49:05.864 yield ' ' + aelt
2025-07-01 17:49:05.864
2025-07-01 17:49:05.864 # pump out diffs from after the synch point
2025-07-01 17:49:05.864 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.865
2025-07-01 17:49:05.865 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.865 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.865
2025-07-01 17:49:05.865 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.865 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.865 alo = 71, ahi = 1101
2025-07-01 17:49:05.865 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.865 blo = 71, bhi = 1101
2025-07-01 17:49:05.865
2025-07-01 17:49:05.865 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.865 g = []
2025-07-01 17:49:05.865 if alo < ahi:
2025-07-01 17:49:05.865 if blo < bhi:
2025-07-01 17:49:05.865 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.865 else:
2025-07-01 17:49:05.865 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.865 elif blo < bhi:
2025-07-01 17:49:05.865 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.866
2025-07-01 17:49:05.866 > yield from g
2025-07-01 17:49:05.866
2025-07-01 17:49:05.866 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.866 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.866
2025-07-01 17:49:05.866 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.866 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.866 alo = 71, ahi = 1101
2025-07-01 17:49:05.866 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.866 blo = 71, bhi = 1101
2025-07-01 17:49:05.866
2025-07-01 17:49:05.866 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.866 r"""
2025-07-01 17:49:05.866 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.866 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.866 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.866 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.866
2025-07-01 17:49:05.866 Example:
2025-07-01 17:49:05.867
2025-07-01 17:49:05.867 >>> d = Differ()
2025-07-01 17:49:05.867 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.867 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.867 >>> print(''.join(results), end="")
2025-07-01 17:49:05.867 - abcDefghiJkl
2025-07-01 17:49:05.867 + abcdefGhijkl
2025-07-01 17:49:05.867 """
2025-07-01 17:49:05.867
2025-07-01 17:49:05.867 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.867 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.867 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.867 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.867 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.867
2025-07-01 17:49:05.867 # search for the pair that matches best without being identical
2025-07-01 17:49:05.867 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.867 # on junk -- unless we have to)
2025-07-01 17:49:05.867 for j in range(blo, bhi):
2025-07-01 17:49:05.867 bj = b[j]
2025-07-01 17:49:05.868 cruncher.set_seq2(bj)
2025-07-01 17:49:05.871 for i in range(alo, ahi):
2025-07-01 17:49:05.871 ai = a[i]
2025-07-01 17:49:05.871 if ai == bj:
2025-07-01 17:49:05.871 if eqi is None:
2025-07-01 17:49:05.871 eqi, eqj = i, j
2025-07-01 17:49:05.871 continue
2025-07-01 17:49:05.871 cruncher.set_seq1(ai)
2025-07-01 17:49:05.871 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.871 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.871 # compares by a factor of 3.
2025-07-01 17:49:05.871 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.871 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.871 # of the computation is cached by cruncher
2025-07-01 17:49:05.871 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.871 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.871 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.871 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.871 if best_ratio < cutoff:
2025-07-01 17:49:05.872 # no non-identical "pretty close" pair
2025-07-01 17:49:05.872 if eqi is None:
2025-07-01 17:49:05.872 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.872 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.872 return
2025-07-01 17:49:05.872 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.872 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.872 else:
2025-07-01 17:49:05.872 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.872 eqi = None
2025-07-01 17:49:05.872
2025-07-01 17:49:05.872 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.872 # identical
2025-07-01 17:49:05.872
2025-07-01 17:49:05.872 # pump out diffs from before the synch point
2025-07-01 17:49:05.872 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.872
2025-07-01 17:49:05.872 # do intraline marking on the synch pair
2025-07-01 17:49:05.872 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.872 if eqi is None:
2025-07-01 17:49:05.872 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.873 atags = btags = ""
2025-07-01 17:49:05.873 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.873 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.873 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.873 if tag == 'replace':
2025-07-01 17:49:05.873 atags += '^' * la
2025-07-01 17:49:05.873 btags += '^' * lb
2025-07-01 17:49:05.873 elif tag == 'delete':
2025-07-01 17:49:05.873 atags += '-' * la
2025-07-01 17:49:05.873 elif tag == 'insert':
2025-07-01 17:49:05.873 btags += '+' * lb
2025-07-01 17:49:05.873 elif tag == 'equal':
2025-07-01 17:49:05.873 atags += ' ' * la
2025-07-01 17:49:05.873 btags += ' ' * lb
2025-07-01 17:49:05.873 else:
2025-07-01 17:49:05.873 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.873 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.873 else:
2025-07-01 17:49:05.874 # the synch pair is identical
2025-07-01 17:49:05.874 yield ' ' + aelt
2025-07-01 17:49:05.874
2025-07-01 17:49:05.874 # pump out diffs from after the synch point
2025-07-01 17:49:05.874 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.874
2025-07-01 17:49:05.874 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.874 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.874
2025-07-01 17:49:05.874 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.874 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.874 alo = 72, ahi = 1101
2025-07-01 17:49:05.874 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.874 blo = 72, bhi = 1101
2025-07-01 17:49:05.874
2025-07-01 17:49:05.874 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.874 g = []
2025-07-01 17:49:05.874 if alo < ahi:
2025-07-01 17:49:05.874 if blo < bhi:
2025-07-01 17:49:05.874 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.874 else:
2025-07-01 17:49:05.875 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.875 elif blo < bhi:
2025-07-01 17:49:05.875 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.875
2025-07-01 17:49:05.875 > yield from g
2025-07-01 17:49:05.875
2025-07-01 17:49:05.875 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.875 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.875
2025-07-01 17:49:05.875 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.875 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.875 alo = 72, ahi = 1101
2025-07-01 17:49:05.875 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.875 blo = 72, bhi = 1101
2025-07-01 17:49:05.875
2025-07-01 17:49:05.875 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.875 r"""
2025-07-01 17:49:05.875 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.875 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.875 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.876 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.876
2025-07-01 17:49:05.876 Example:
2025-07-01 17:49:05.876
2025-07-01 17:49:05.876 >>> d = Differ()
2025-07-01 17:49:05.876 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.876 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.876 >>> print(''.join(results), end="")
2025-07-01 17:49:05.876 - abcDefghiJkl
2025-07-01 17:49:05.876 + abcdefGhijkl
2025-07-01 17:49:05.876 """
2025-07-01 17:49:05.876
2025-07-01 17:49:05.876 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.876 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.876 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.876 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.877 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.877
2025-07-01 17:49:05.877 # search for the pair that matches best without being identical
2025-07-01 17:49:05.877 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.877 # on junk -- unless we have to)
2025-07-01 17:49:05.877 for j in range(blo, bhi):
2025-07-01 17:49:05.877 bj = b[j]
2025-07-01 17:49:05.877 cruncher.set_seq2(bj)
2025-07-01 17:49:05.877 for i in range(alo, ahi):
2025-07-01 17:49:05.877 ai = a[i]
2025-07-01 17:49:05.877 if ai == bj:
2025-07-01 17:49:05.877 if eqi is None:
2025-07-01 17:49:05.877 eqi, eqj = i, j
2025-07-01 17:49:05.877 continue
2025-07-01 17:49:05.877 cruncher.set_seq1(ai)
2025-07-01 17:49:05.877 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.877 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.877 # compares by a factor of 3.
2025-07-01 17:49:05.877 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.877 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.878 # of the computation is cached by cruncher
2025-07-01 17:49:05.878 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.878 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.878 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.878 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.878 if best_ratio < cutoff:
2025-07-01 17:49:05.878 # no non-identical "pretty close" pair
2025-07-01 17:49:05.878 if eqi is None:
2025-07-01 17:49:05.878 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.878 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.878 return
2025-07-01 17:49:05.878 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.878 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.878 else:
2025-07-01 17:49:05.878 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.878 eqi = None
2025-07-01 17:49:05.878
2025-07-01 17:49:05.878 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.878 # identical
2025-07-01 17:49:05.878
2025-07-01 17:49:05.878 # pump out diffs from before the synch point
2025-07-01 17:49:05.879 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.879
2025-07-01 17:49:05.879 # do intraline marking on the synch pair
2025-07-01 17:49:05.879 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.879 if eqi is None:
2025-07-01 17:49:05.879 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.879 atags = btags = ""
2025-07-01 17:49:05.879 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.879 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.879 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.879 if tag == 'replace':
2025-07-01 17:49:05.879 atags += '^' * la
2025-07-01 17:49:05.879 btags += '^' * lb
2025-07-01 17:49:05.879 elif tag == 'delete':
2025-07-01 17:49:05.879 atags += '-' * la
2025-07-01 17:49:05.879 elif tag == 'insert':
2025-07-01 17:49:05.879 btags += '+' * lb
2025-07-01 17:49:05.879 elif tag == 'equal':
2025-07-01 17:49:05.879 atags += ' ' * la
2025-07-01 17:49:05.879 btags += ' ' * lb
2025-07-01 17:49:05.879 else:
2025-07-01 17:49:05.880 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.880 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.880 else:
2025-07-01 17:49:05.880 # the synch pair is identical
2025-07-01 17:49:05.880 yield ' ' + aelt
2025-07-01 17:49:05.880
2025-07-01 17:49:05.880 # pump out diffs from after the synch point
2025-07-01 17:49:05.880 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.880
2025-07-01 17:49:05.880 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.880 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.880
2025-07-01 17:49:05.880 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.880 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.880 alo = 73, ahi = 1101
2025-07-01 17:49:05.880 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.880 blo = 73, bhi = 1101
2025-07-01 17:49:05.880
2025-07-01 17:49:05.880 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.880 g = []
2025-07-01 17:49:05.880 if alo < ahi:
2025-07-01 17:49:05.881 if blo < bhi:
2025-07-01 17:49:05.881 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.881 else:
2025-07-01 17:49:05.881 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.881 elif blo < bhi:
2025-07-01 17:49:05.881 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.881
2025-07-01 17:49:05.881 > yield from g
2025-07-01 17:49:05.881
2025-07-01 17:49:05.881 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.881 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.881
2025-07-01 17:49:05.881 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.881 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.881 alo = 73, ahi = 1101
2025-07-01 17:49:05.881 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.881 blo = 73, bhi = 1101
2025-07-01 17:49:05.881
2025-07-01 17:49:05.881 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.881 r"""
2025-07-01 17:49:05.881 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.882 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.882 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.882 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.882
2025-07-01 17:49:05.882 Example:
2025-07-01 17:49:05.882
2025-07-01 17:49:05.882 >>> d = Differ()
2025-07-01 17:49:05.882 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.882 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.882 >>> print(''.join(results), end="")
2025-07-01 17:49:05.882 - abcDefghiJkl
2025-07-01 17:49:05.882 + abcdefGhijkl
2025-07-01 17:49:05.882 """
2025-07-01 17:49:05.882
2025-07-01 17:49:05.882 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.882 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.882 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.882 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.882 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.882
2025-07-01 17:49:05.883 # search for the pair that matches best without being identical
2025-07-01 17:49:05.883 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.883 # on junk -- unless we have to)
2025-07-01 17:49:05.883 for j in range(blo, bhi):
2025-07-01 17:49:05.883 bj = b[j]
2025-07-01 17:49:05.883 cruncher.set_seq2(bj)
2025-07-01 17:49:05.883 for i in range(alo, ahi):
2025-07-01 17:49:05.883 ai = a[i]
2025-07-01 17:49:05.883 if ai == bj:
2025-07-01 17:49:05.883 if eqi is None:
2025-07-01 17:49:05.883 eqi, eqj = i, j
2025-07-01 17:49:05.883 continue
2025-07-01 17:49:05.883 cruncher.set_seq1(ai)
2025-07-01 17:49:05.883 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.883 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.883 # compares by a factor of 3.
2025-07-01 17:49:05.883 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.883 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.883 # of the computation is cached by cruncher
2025-07-01 17:49:05.883 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.883 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.884 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.889 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.889 if best_ratio < cutoff:
2025-07-01 17:49:05.889 # no non-identical "pretty close" pair
2025-07-01 17:49:05.889 if eqi is None:
2025-07-01 17:49:05.889 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.889 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.889 return
2025-07-01 17:49:05.889 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.889 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.889 else:
2025-07-01 17:49:05.889 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.889 eqi = None
2025-07-01 17:49:05.889
2025-07-01 17:49:05.889 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.889 # identical
2025-07-01 17:49:05.889
2025-07-01 17:49:05.889 # pump out diffs from before the synch point
2025-07-01 17:49:05.890 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.890
2025-07-01 17:49:05.890 # do intraline marking on the synch pair
2025-07-01 17:49:05.890 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.890 if eqi is None:
2025-07-01 17:49:05.890 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.890 atags = btags = ""
2025-07-01 17:49:05.890 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.890 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.890 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.890 if tag == 'replace':
2025-07-01 17:49:05.890 atags += '^' * la
2025-07-01 17:49:05.890 btags += '^' * lb
2025-07-01 17:49:05.890 elif tag == 'delete':
2025-07-01 17:49:05.890 atags += '-' * la
2025-07-01 17:49:05.890 elif tag == 'insert':
2025-07-01 17:49:05.890 btags += '+' * lb
2025-07-01 17:49:05.890 elif tag == 'equal':
2025-07-01 17:49:05.890 atags += ' ' * la
2025-07-01 17:49:05.890 btags += ' ' * lb
2025-07-01 17:49:05.891 else:
2025-07-01 17:49:05.891 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.891 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.891 else:
2025-07-01 17:49:05.891 # the synch pair is identical
2025-07-01 17:49:05.891 yield ' ' + aelt
2025-07-01 17:49:05.891
2025-07-01 17:49:05.891 # pump out diffs from after the synch point
2025-07-01 17:49:05.891 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.891
2025-07-01 17:49:05.891 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.891 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.891
2025-07-01 17:49:05.891 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.891 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.891 alo = 74, ahi = 1101
2025-07-01 17:49:05.891 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.891 blo = 74, bhi = 1101
2025-07-01 17:49:05.891
2025-07-01 17:49:05.891 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.892 g = []
2025-07-01 17:49:05.892 if alo < ahi:
2025-07-01 17:49:05.892 if blo < bhi:
2025-07-01 17:49:05.892 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.892 else:
2025-07-01 17:49:05.892 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.892 elif blo < bhi:
2025-07-01 17:49:05.892 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.892
2025-07-01 17:49:05.892 > yield from g
2025-07-01 17:49:05.892
2025-07-01 17:49:05.892 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.892 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.892
2025-07-01 17:49:05.892 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.892 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.892 alo = 74, ahi = 1101
2025-07-01 17:49:05.892 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.892 blo = 74, bhi = 1101
2025-07-01 17:49:05.893
2025-07-01 17:49:05.893 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.893 r"""
2025-07-01 17:49:05.893 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.893 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.893 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.893 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.893
2025-07-01 17:49:05.893 Example:
2025-07-01 17:49:05.893
2025-07-01 17:49:05.893 >>> d = Differ()
2025-07-01 17:49:05.893 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.893 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.893 >>> print(''.join(results), end="")
2025-07-01 17:49:05.893 - abcDefghiJkl
2025-07-01 17:49:05.893 + abcdefGhijkl
2025-07-01 17:49:05.893 """
2025-07-01 17:49:05.893
2025-07-01 17:49:05.893 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.894 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.894 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.894 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.894 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.894
2025-07-01 17:49:05.894 # search for the pair that matches best without being identical
2025-07-01 17:49:05.894 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.894 # on junk -- unless we have to)
2025-07-01 17:49:05.894 for j in range(blo, bhi):
2025-07-01 17:49:05.894 bj = b[j]
2025-07-01 17:49:05.894 cruncher.set_seq2(bj)
2025-07-01 17:49:05.894 for i in range(alo, ahi):
2025-07-01 17:49:05.894 ai = a[i]
2025-07-01 17:49:05.894 if ai == bj:
2025-07-01 17:49:05.894 if eqi is None:
2025-07-01 17:49:05.894 eqi, eqj = i, j
2025-07-01 17:49:05.894 continue
2025-07-01 17:49:05.894 cruncher.set_seq1(ai)
2025-07-01 17:49:05.894 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.894 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.894 # compares by a factor of 3.
2025-07-01 17:49:05.895 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.895 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.895 # of the computation is cached by cruncher
2025-07-01 17:49:05.895 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.895 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.895 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.895 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.895 if best_ratio < cutoff:
2025-07-01 17:49:05.895 # no non-identical "pretty close" pair
2025-07-01 17:49:05.895 if eqi is None:
2025-07-01 17:49:05.895 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.895 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.895 return
2025-07-01 17:49:05.895 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.895 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.895 else:
2025-07-01 17:49:05.895 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.895 eqi = None
2025-07-01 17:49:05.895
2025-07-01 17:49:05.895 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.895 # identical
2025-07-01 17:49:05.896
2025-07-01 17:49:05.896 # pump out diffs from before the synch point
2025-07-01 17:49:05.896 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.896
2025-07-01 17:49:05.896 # do intraline marking on the synch pair
2025-07-01 17:49:05.896 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.896 if eqi is None:
2025-07-01 17:49:05.896 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.896 atags = btags = ""
2025-07-01 17:49:05.896 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.896 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.896 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.896 if tag == 'replace':
2025-07-01 17:49:05.896 atags += '^' * la
2025-07-01 17:49:05.896 btags += '^' * lb
2025-07-01 17:49:05.896 elif tag == 'delete':
2025-07-01 17:49:05.896 atags += '-' * la
2025-07-01 17:49:05.896 elif tag == 'insert':
2025-07-01 17:49:05.896 btags += '+' * lb
2025-07-01 17:49:05.896 elif tag == 'equal':
2025-07-01 17:49:05.897 atags += ' ' * la
2025-07-01 17:49:05.897 btags += ' ' * lb
2025-07-01 17:49:05.897 else:
2025-07-01 17:49:05.897 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.897 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.897 else:
2025-07-01 17:49:05.897 # the synch pair is identical
2025-07-01 17:49:05.897 yield ' ' + aelt
2025-07-01 17:49:05.897
2025-07-01 17:49:05.897 # pump out diffs from after the synch point
2025-07-01 17:49:05.897 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.897
2025-07-01 17:49:05.897 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.897 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.897
2025-07-01 17:49:05.897 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.897 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.897 alo = 75, ahi = 1101
2025-07-01 17:49:05.897 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.897 blo = 75, bhi = 1101
2025-07-01 17:49:05.897
2025-07-01 17:49:05.898 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.898 g = []
2025-07-01 17:49:05.898 if alo < ahi:
2025-07-01 17:49:05.898 if blo < bhi:
2025-07-01 17:49:05.898 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.898 else:
2025-07-01 17:49:05.898 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.898 elif blo < bhi:
2025-07-01 17:49:05.898 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.898
2025-07-01 17:49:05.898 > yield from g
2025-07-01 17:49:05.898
2025-07-01 17:49:05.898 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.898 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.898
2025-07-01 17:49:05.898 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.898 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.898 alo = 75, ahi = 1101
2025-07-01 17:49:05.899 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.901 blo = 75, bhi = 1101
2025-07-01 17:49:05.902
2025-07-01 17:49:05.902 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.902 r"""
2025-07-01 17:49:05.902 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.902 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.902 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.902 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.902
2025-07-01 17:49:05.902 Example:
2025-07-01 17:49:05.902
2025-07-01 17:49:05.902 >>> d = Differ()
2025-07-01 17:49:05.902 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.902 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.902 >>> print(''.join(results), end="")
2025-07-01 17:49:05.902 - abcDefghiJkl
2025-07-01 17:49:05.902 + abcdefGhijkl
2025-07-01 17:49:05.902 """
2025-07-01 17:49:05.902
2025-07-01 17:49:05.902 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.903 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.903 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.903 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.903 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.903
2025-07-01 17:49:05.903 # search for the pair that matches best without being identical
2025-07-01 17:49:05.903 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.903 # on junk -- unless we have to)
2025-07-01 17:49:05.903 for j in range(blo, bhi):
2025-07-01 17:49:05.903 bj = b[j]
2025-07-01 17:49:05.903 cruncher.set_seq2(bj)
2025-07-01 17:49:05.903 for i in range(alo, ahi):
2025-07-01 17:49:05.903 ai = a[i]
2025-07-01 17:49:05.903 if ai == bj:
2025-07-01 17:49:05.903 if eqi is None:
2025-07-01 17:49:05.903 eqi, eqj = i, j
2025-07-01 17:49:05.903 continue
2025-07-01 17:49:05.903 cruncher.set_seq1(ai)
2025-07-01 17:49:05.903 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.903 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.903 # compares by a factor of 3.
2025-07-01 17:49:05.904 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.904 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.904 # of the computation is cached by cruncher
2025-07-01 17:49:05.904 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.904 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.904 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.904 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.904 if best_ratio < cutoff:
2025-07-01 17:49:05.904 # no non-identical "pretty close" pair
2025-07-01 17:49:05.904 if eqi is None:
2025-07-01 17:49:05.904 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.904 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.904 return
2025-07-01 17:49:05.904 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.904 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.904 else:
2025-07-01 17:49:05.904 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.904 eqi = None
2025-07-01 17:49:05.904
2025-07-01 17:49:05.904 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.904 # identical
2025-07-01 17:49:05.905
2025-07-01 17:49:05.905 # pump out diffs from before the synch point
2025-07-01 17:49:05.905 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.905
2025-07-01 17:49:05.905 # do intraline marking on the synch pair
2025-07-01 17:49:05.905 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.905 if eqi is None:
2025-07-01 17:49:05.905 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.905 atags = btags = ""
2025-07-01 17:49:05.905 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.905 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.905 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.905 if tag == 'replace':
2025-07-01 17:49:05.905 atags += '^' * la
2025-07-01 17:49:05.905 btags += '^' * lb
2025-07-01 17:49:05.905 elif tag == 'delete':
2025-07-01 17:49:05.905 atags += '-' * la
2025-07-01 17:49:05.905 elif tag == 'insert':
2025-07-01 17:49:05.905 btags += '+' * lb
2025-07-01 17:49:05.905 elif tag == 'equal':
2025-07-01 17:49:05.905 atags += ' ' * la
2025-07-01 17:49:05.906 btags += ' ' * lb
2025-07-01 17:49:05.906 else:
2025-07-01 17:49:05.906 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.906 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.906 else:
2025-07-01 17:49:05.906 # the synch pair is identical
2025-07-01 17:49:05.906 yield ' ' + aelt
2025-07-01 17:49:05.906
2025-07-01 17:49:05.906 # pump out diffs from after the synch point
2025-07-01 17:49:05.906 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.906
2025-07-01 17:49:05.906 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.906 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.906
2025-07-01 17:49:05.906 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.906 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.906 alo = 76, ahi = 1101
2025-07-01 17:49:05.906 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.906 blo = 76, bhi = 1101
2025-07-01 17:49:05.906
2025-07-01 17:49:05.907 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.907 g = []
2025-07-01 17:49:05.907 if alo < ahi:
2025-07-01 17:49:05.907 if blo < bhi:
2025-07-01 17:49:05.907 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.907 else:
2025-07-01 17:49:05.907 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.907 elif blo < bhi:
2025-07-01 17:49:05.907 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.907
2025-07-01 17:49:05.907 > yield from g
2025-07-01 17:49:05.907
2025-07-01 17:49:05.907 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.907 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.907
2025-07-01 17:49:05.907 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.907 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.907 alo = 76, ahi = 1101
2025-07-01 17:49:05.907 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.907 blo = 76, bhi = 1101
2025-07-01 17:49:05.907
2025-07-01 17:49:05.908 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.908 r"""
2025-07-01 17:49:05.908 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.908 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.908 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.908 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.908
2025-07-01 17:49:05.908 Example:
2025-07-01 17:49:05.908
2025-07-01 17:49:05.908 >>> d = Differ()
2025-07-01 17:49:05.908 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.908 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.908 >>> print(''.join(results), end="")
2025-07-01 17:49:05.908 - abcDefghiJkl
2025-07-01 17:49:05.908 + abcdefGhijkl
2025-07-01 17:49:05.908 """
2025-07-01 17:49:05.908
2025-07-01 17:49:05.908 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.908 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.909 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.909 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.909 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.909
2025-07-01 17:49:05.909 # search for the pair that matches best without being identical
2025-07-01 17:49:05.909 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.909 # on junk -- unless we have to)
2025-07-01 17:49:05.909 for j in range(blo, bhi):
2025-07-01 17:49:05.909 bj = b[j]
2025-07-01 17:49:05.909 cruncher.set_seq2(bj)
2025-07-01 17:49:05.909 for i in range(alo, ahi):
2025-07-01 17:49:05.909 ai = a[i]
2025-07-01 17:49:05.909 if ai == bj:
2025-07-01 17:49:05.909 if eqi is None:
2025-07-01 17:49:05.909 eqi, eqj = i, j
2025-07-01 17:49:05.909 continue
2025-07-01 17:49:05.909 cruncher.set_seq1(ai)
2025-07-01 17:49:05.909 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.909 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.909 # compares by a factor of 3.
2025-07-01 17:49:05.910 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.910 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.910 # of the computation is cached by cruncher
2025-07-01 17:49:05.910 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.910 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.910 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.910 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.910 if best_ratio < cutoff:
2025-07-01 17:49:05.910 # no non-identical "pretty close" pair
2025-07-01 17:49:05.910 if eqi is None:
2025-07-01 17:49:05.910 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.910 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.910 return
2025-07-01 17:49:05.910 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.910 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.910 else:
2025-07-01 17:49:05.910 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.910 eqi = None
2025-07-01 17:49:05.910
2025-07-01 17:49:05.910 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.911 # identical
2025-07-01 17:49:05.911
2025-07-01 17:49:05.911 # pump out diffs from before the synch point
2025-07-01 17:49:05.911 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.911
2025-07-01 17:49:05.911 # do intraline marking on the synch pair
2025-07-01 17:49:05.911 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.911 if eqi is None:
2025-07-01 17:49:05.911 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.911 atags = btags = ""
2025-07-01 17:49:05.911 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.911 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.911 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.911 if tag == 'replace':
2025-07-01 17:49:05.911 atags += '^' * la
2025-07-01 17:49:05.911 btags += '^' * lb
2025-07-01 17:49:05.911 elif tag == 'delete':
2025-07-01 17:49:05.911 atags += '-' * la
2025-07-01 17:49:05.911 elif tag == 'insert':
2025-07-01 17:49:05.911 btags += '+' * lb
2025-07-01 17:49:05.912 elif tag == 'equal':
2025-07-01 17:49:05.912 atags += ' ' * la
2025-07-01 17:49:05.912 btags += ' ' * lb
2025-07-01 17:49:05.912 else:
2025-07-01 17:49:05.912 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.912 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.912 else:
2025-07-01 17:49:05.912 # the synch pair is identical
2025-07-01 17:49:05.912 yield ' ' + aelt
2025-07-01 17:49:05.912
2025-07-01 17:49:05.912 # pump out diffs from after the synch point
2025-07-01 17:49:05.912 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.912
2025-07-01 17:49:05.912 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.912 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.912
2025-07-01 17:49:05.912 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.912 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.912 alo = 77, ahi = 1101
2025-07-01 17:49:05.912 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.913 blo = 77, bhi = 1101
2025-07-01 17:49:05.913
2025-07-01 17:49:05.913 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.913 g = []
2025-07-01 17:49:05.913 if alo < ahi:
2025-07-01 17:49:05.913 if blo < bhi:
2025-07-01 17:49:05.913 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.913 else:
2025-07-01 17:49:05.913 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.913 elif blo < bhi:
2025-07-01 17:49:05.913 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.913
2025-07-01 17:49:05.913 > yield from g
2025-07-01 17:49:05.913
2025-07-01 17:49:05.913 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.913 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.913
2025-07-01 17:49:05.913 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.913 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.913 alo = 77, ahi = 1101
2025-07-01 17:49:05.913 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.914 blo = 77, bhi = 1101
2025-07-01 17:49:05.914
2025-07-01 17:49:05.914 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.914 r"""
2025-07-01 17:49:05.914 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.914 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.914 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.914 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.914
2025-07-01 17:49:05.914 Example:
2025-07-01 17:49:05.914
2025-07-01 17:49:05.914 >>> d = Differ()
2025-07-01 17:49:05.914 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.914 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.914 >>> print(''.join(results), end="")
2025-07-01 17:49:05.914 - abcDefghiJkl
2025-07-01 17:49:05.914 + abcdefGhijkl
2025-07-01 17:49:05.914 """
2025-07-01 17:49:05.914
2025-07-01 17:49:05.915 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.920 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.920 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.920 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.920 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.920
2025-07-01 17:49:05.920 # search for the pair that matches best without being identical
2025-07-01 17:49:05.920 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.920 # on junk -- unless we have to)
2025-07-01 17:49:05.920 for j in range(blo, bhi):
2025-07-01 17:49:05.920 bj = b[j]
2025-07-01 17:49:05.920 cruncher.set_seq2(bj)
2025-07-01 17:49:05.920 for i in range(alo, ahi):
2025-07-01 17:49:05.920 ai = a[i]
2025-07-01 17:49:05.920 if ai == bj:
2025-07-01 17:49:05.920 if eqi is None:
2025-07-01 17:49:05.920 eqi, eqj = i, j
2025-07-01 17:49:05.920 continue
2025-07-01 17:49:05.920 cruncher.set_seq1(ai)
2025-07-01 17:49:05.920 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.921 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.921 # compares by a factor of 3.
2025-07-01 17:49:05.921 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.921 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.921 # of the computation is cached by cruncher
2025-07-01 17:49:05.921 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.921 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.921 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.921 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.921 if best_ratio < cutoff:
2025-07-01 17:49:05.921 # no non-identical "pretty close" pair
2025-07-01 17:49:05.921 if eqi is None:
2025-07-01 17:49:05.921 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.921 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.921 return
2025-07-01 17:49:05.921 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.921 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.921 else:
2025-07-01 17:49:05.921 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.921 eqi = None
2025-07-01 17:49:05.921
2025-07-01 17:49:05.922 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.922 # identical
2025-07-01 17:49:05.922
2025-07-01 17:49:05.922 # pump out diffs from before the synch point
2025-07-01 17:49:05.922 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.922
2025-07-01 17:49:05.922 # do intraline marking on the synch pair
2025-07-01 17:49:05.922 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.922 if eqi is None:
2025-07-01 17:49:05.922 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.922 atags = btags = ""
2025-07-01 17:49:05.922 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.922 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.922 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.922 if tag == 'replace':
2025-07-01 17:49:05.922 atags += '^' * la
2025-07-01 17:49:05.922 btags += '^' * lb
2025-07-01 17:49:05.922 elif tag == 'delete':
2025-07-01 17:49:05.922 atags += '-' * la
2025-07-01 17:49:05.922 elif tag == 'insert':
2025-07-01 17:49:05.922 btags += '+' * lb
2025-07-01 17:49:05.923 elif tag == 'equal':
2025-07-01 17:49:05.923 atags += ' ' * la
2025-07-01 17:49:05.923 btags += ' ' * lb
2025-07-01 17:49:05.923 else:
2025-07-01 17:49:05.923 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.923 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.923 else:
2025-07-01 17:49:05.923 # the synch pair is identical
2025-07-01 17:49:05.923 yield ' ' + aelt
2025-07-01 17:49:05.923
2025-07-01 17:49:05.923 # pump out diffs from after the synch point
2025-07-01 17:49:05.923 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.923
2025-07-01 17:49:05.923 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.923 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.923
2025-07-01 17:49:05.923 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.923 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.923 alo = 78, ahi = 1101
2025-07-01 17:49:05.924 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.924 blo = 78, bhi = 1101
2025-07-01 17:49:05.924
2025-07-01 17:49:05.924 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.924 g = []
2025-07-01 17:49:05.924 if alo < ahi:
2025-07-01 17:49:05.924 if blo < bhi:
2025-07-01 17:49:05.924 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.924 else:
2025-07-01 17:49:05.924 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.924 elif blo < bhi:
2025-07-01 17:49:05.924 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.924
2025-07-01 17:49:05.924 > yield from g
2025-07-01 17:49:05.924
2025-07-01 17:49:05.924 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.924 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.924
2025-07-01 17:49:05.924 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.924 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.925 alo = 78, ahi = 1101
2025-07-01 17:49:05.925 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.925 blo = 78, bhi = 1101
2025-07-01 17:49:05.925
2025-07-01 17:49:05.925 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.925 r"""
2025-07-01 17:49:05.925 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.925 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.925 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.925 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.925
2025-07-01 17:49:05.925 Example:
2025-07-01 17:49:05.925
2025-07-01 17:49:05.925 >>> d = Differ()
2025-07-01 17:49:05.925 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.925 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.925 >>> print(''.join(results), end="")
2025-07-01 17:49:05.925 - abcDefghiJkl
2025-07-01 17:49:05.925 + abcdefGhijkl
2025-07-01 17:49:05.926 """
2025-07-01 17:49:05.926
2025-07-01 17:49:05.926 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.926 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.926 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.926 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.926 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.926
2025-07-01 17:49:05.926 # search for the pair that matches best without being identical
2025-07-01 17:49:05.926 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.926 # on junk -- unless we have to)
2025-07-01 17:49:05.926 for j in range(blo, bhi):
2025-07-01 17:49:05.926 bj = b[j]
2025-07-01 17:49:05.926 cruncher.set_seq2(bj)
2025-07-01 17:49:05.926 for i in range(alo, ahi):
2025-07-01 17:49:05.926 ai = a[i]
2025-07-01 17:49:05.926 if ai == bj:
2025-07-01 17:49:05.926 if eqi is None:
2025-07-01 17:49:05.926 eqi, eqj = i, j
2025-07-01 17:49:05.926 continue
2025-07-01 17:49:05.927 cruncher.set_seq1(ai)
2025-07-01 17:49:05.927 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.927 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.927 # compares by a factor of 3.
2025-07-01 17:49:05.927 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.927 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.927 # of the computation is cached by cruncher
2025-07-01 17:49:05.927 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.927 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.927 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.927 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.927 if best_ratio < cutoff:
2025-07-01 17:49:05.927 # no non-identical "pretty close" pair
2025-07-01 17:49:05.927 if eqi is None:
2025-07-01 17:49:05.927 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.927 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.927 return
2025-07-01 17:49:05.927 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.927 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.927 else:
2025-07-01 17:49:05.927 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.928 eqi = None
2025-07-01 17:49:05.928
2025-07-01 17:49:05.928 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.928 # identical
2025-07-01 17:49:05.928
2025-07-01 17:49:05.928 # pump out diffs from before the synch point
2025-07-01 17:49:05.928 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.928
2025-07-01 17:49:05.928 # do intraline marking on the synch pair
2025-07-01 17:49:05.928 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.928 if eqi is None:
2025-07-01 17:49:05.928 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.928 atags = btags = ""
2025-07-01 17:49:05.928 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.928 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.928 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.928 if tag == 'replace':
2025-07-01 17:49:05.928 atags += '^' * la
2025-07-01 17:49:05.928 btags += '^' * lb
2025-07-01 17:49:05.928 elif tag == 'delete':
2025-07-01 17:49:05.928 atags += '-' * la
2025-07-01 17:49:05.929 elif tag == 'insert':
2025-07-01 17:49:05.929 btags += '+' * lb
2025-07-01 17:49:05.929 elif tag == 'equal':
2025-07-01 17:49:05.929 atags += ' ' * la
2025-07-01 17:49:05.929 btags += ' ' * lb
2025-07-01 17:49:05.929 else:
2025-07-01 17:49:05.929 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.929 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.929 else:
2025-07-01 17:49:05.929 # the synch pair is identical
2025-07-01 17:49:05.929 yield ' ' + aelt
2025-07-01 17:49:05.929
2025-07-01 17:49:05.929 # pump out diffs from after the synch point
2025-07-01 17:49:05.929 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.929
2025-07-01 17:49:05.929 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.929 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.929
2025-07-01 17:49:05.929 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.929 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.930 alo = 79, ahi = 1101
2025-07-01 17:49:05.930 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.930 blo = 79, bhi = 1101
2025-07-01 17:49:05.930
2025-07-01 17:49:05.930 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.930 g = []
2025-07-01 17:49:05.930 if alo < ahi:
2025-07-01 17:49:05.930 if blo < bhi:
2025-07-01 17:49:05.930 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.930 else:
2025-07-01 17:49:05.930 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.930 elif blo < bhi:
2025-07-01 17:49:05.930 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.930
2025-07-01 17:49:05.930 > yield from g
2025-07-01 17:49:05.930
2025-07-01 17:49:05.930 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.930 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.930
2025-07-01 17:49:05.930 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.930 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.931 alo = 79, ahi = 1101
2025-07-01 17:49:05.934 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.934 blo = 79, bhi = 1101
2025-07-01 17:49:05.934
2025-07-01 17:49:05.934 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.934 r"""
2025-07-01 17:49:05.934 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.934 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.934 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.934 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.934
2025-07-01 17:49:05.934 Example:
2025-07-01 17:49:05.934
2025-07-01 17:49:05.934 >>> d = Differ()
2025-07-01 17:49:05.935 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.935 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.935 >>> print(''.join(results), end="")
2025-07-01 17:49:05.935 - abcDefghiJkl
2025-07-01 17:49:05.935 + abcdefGhijkl
2025-07-01 17:49:05.935 """
2025-07-01 17:49:05.935
2025-07-01 17:49:05.935 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.935 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.935 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.935 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.935 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.935
2025-07-01 17:49:05.935 # search for the pair that matches best without being identical
2025-07-01 17:49:05.935 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.935 # on junk -- unless we have to)
2025-07-01 17:49:05.935 for j in range(blo, bhi):
2025-07-01 17:49:05.935 bj = b[j]
2025-07-01 17:49:05.936 cruncher.set_seq2(bj)
2025-07-01 17:49:05.936 for i in range(alo, ahi):
2025-07-01 17:49:05.936 ai = a[i]
2025-07-01 17:49:05.936 if ai == bj:
2025-07-01 17:49:05.936 if eqi is None:
2025-07-01 17:49:05.936 eqi, eqj = i, j
2025-07-01 17:49:05.936 continue
2025-07-01 17:49:05.936 cruncher.set_seq1(ai)
2025-07-01 17:49:05.936 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.936 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.936 # compares by a factor of 3.
2025-07-01 17:49:05.936 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.936 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.936 # of the computation is cached by cruncher
2025-07-01 17:49:05.936 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.936 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.936 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.936 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.936 if best_ratio < cutoff:
2025-07-01 17:49:05.936 # no non-identical "pretty close" pair
2025-07-01 17:49:05.937 if eqi is None:
2025-07-01 17:49:05.937 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.937 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.937 return
2025-07-01 17:49:05.937 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.937 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.937 else:
2025-07-01 17:49:05.937 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.937 eqi = None
2025-07-01 17:49:05.937
2025-07-01 17:49:05.937 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.937 # identical
2025-07-01 17:49:05.937
2025-07-01 17:49:05.937 # pump out diffs from before the synch point
2025-07-01 17:49:05.937 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.937
2025-07-01 17:49:05.937 # do intraline marking on the synch pair
2025-07-01 17:49:05.937 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.937 if eqi is None:
2025-07-01 17:49:05.937 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.938 atags = btags = ""
2025-07-01 17:49:05.938 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.938 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.938 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.938 if tag == 'replace':
2025-07-01 17:49:05.938 atags += '^' * la
2025-07-01 17:49:05.938 btags += '^' * lb
2025-07-01 17:49:05.938 elif tag == 'delete':
2025-07-01 17:49:05.938 atags += '-' * la
2025-07-01 17:49:05.938 elif tag == 'insert':
2025-07-01 17:49:05.938 btags += '+' * lb
2025-07-01 17:49:05.938 elif tag == 'equal':
2025-07-01 17:49:05.938 atags += ' ' * la
2025-07-01 17:49:05.938 btags += ' ' * lb
2025-07-01 17:49:05.938 else:
2025-07-01 17:49:05.938 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.938 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.938 else:
2025-07-01 17:49:05.938 # the synch pair is identical
2025-07-01 17:49:05.938 yield ' ' + aelt
2025-07-01 17:49:05.939
2025-07-01 17:49:05.939 # pump out diffs from after the synch point
2025-07-01 17:49:05.939 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.939
2025-07-01 17:49:05.939 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.939 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.939
2025-07-01 17:49:05.939 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.939 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.939 alo = 80, ahi = 1101
2025-07-01 17:49:05.939 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.939 blo = 80, bhi = 1101
2025-07-01 17:49:05.939
2025-07-01 17:49:05.939 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.939 g = []
2025-07-01 17:49:05.939 if alo < ahi:
2025-07-01 17:49:05.939 if blo < bhi:
2025-07-01 17:49:05.939 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.939 else:
2025-07-01 17:49:05.939 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.939 elif blo < bhi:
2025-07-01 17:49:05.939 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.940
2025-07-01 17:49:05.940 > yield from g
2025-07-01 17:49:05.940
2025-07-01 17:49:05.940 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.940 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.940
2025-07-01 17:49:05.940 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.940 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.940 alo = 80, ahi = 1101
2025-07-01 17:49:05.940 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.940 blo = 80, bhi = 1101
2025-07-01 17:49:05.940
2025-07-01 17:49:05.940 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.940 r"""
2025-07-01 17:49:05.940 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.940 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.940 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.940 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.940
2025-07-01 17:49:05.940 Example:
2025-07-01 17:49:05.940
2025-07-01 17:49:05.941 >>> d = Differ()
2025-07-01 17:49:05.941 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.941 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.941 >>> print(''.join(results), end="")
2025-07-01 17:49:05.941 - abcDefghiJkl
2025-07-01 17:49:05.941 + abcdefGhijkl
2025-07-01 17:49:05.941 """
2025-07-01 17:49:05.941
2025-07-01 17:49:05.941 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.941 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.941 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.941 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.941 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.941
2025-07-01 17:49:05.941 # search for the pair that matches best without being identical
2025-07-01 17:49:05.941 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.941 # on junk -- unless we have to)
2025-07-01 17:49:05.941 for j in range(blo, bhi):
2025-07-01 17:49:05.941 bj = b[j]
2025-07-01 17:49:05.941 cruncher.set_seq2(bj)
2025-07-01 17:49:05.942 for i in range(alo, ahi):
2025-07-01 17:49:05.942 ai = a[i]
2025-07-01 17:49:05.942 if ai == bj:
2025-07-01 17:49:05.942 if eqi is None:
2025-07-01 17:49:05.942 eqi, eqj = i, j
2025-07-01 17:49:05.942 continue
2025-07-01 17:49:05.942 cruncher.set_seq1(ai)
2025-07-01 17:49:05.942 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.942 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.942 # compares by a factor of 3.
2025-07-01 17:49:05.942 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.942 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.942 # of the computation is cached by cruncher
2025-07-01 17:49:05.942 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.942 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.942 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.942 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.942 if best_ratio < cutoff:
2025-07-01 17:49:05.942 # no non-identical "pretty close" pair
2025-07-01 17:49:05.942 if eqi is None:
2025-07-01 17:49:05.942 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.943 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.943 return
2025-07-01 17:49:05.943 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.943 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.943 else:
2025-07-01 17:49:05.943 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.943 eqi = None
2025-07-01 17:49:05.943
2025-07-01 17:49:05.943 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.943 # identical
2025-07-01 17:49:05.943
2025-07-01 17:49:05.943 # pump out diffs from before the synch point
2025-07-01 17:49:05.943 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.943
2025-07-01 17:49:05.943 # do intraline marking on the synch pair
2025-07-01 17:49:05.943 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.943 if eqi is None:
2025-07-01 17:49:05.943 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.943 atags = btags = ""
2025-07-01 17:49:05.943 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.944 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.944 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.944 if tag == 'replace':
2025-07-01 17:49:05.944 atags += '^' * la
2025-07-01 17:49:05.944 btags += '^' * lb
2025-07-01 17:49:05.944 elif tag == 'delete':
2025-07-01 17:49:05.944 atags += '-' * la
2025-07-01 17:49:05.944 elif tag == 'insert':
2025-07-01 17:49:05.944 btags += '+' * lb
2025-07-01 17:49:05.944 elif tag == 'equal':
2025-07-01 17:49:05.944 atags += ' ' * la
2025-07-01 17:49:05.944 btags += ' ' * lb
2025-07-01 17:49:05.944 else:
2025-07-01 17:49:05.944 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.944 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.944 else:
2025-07-01 17:49:05.944 # the synch pair is identical
2025-07-01 17:49:05.944 yield ' ' + aelt
2025-07-01 17:49:05.944
2025-07-01 17:49:05.944 # pump out diffs from after the synch point
2025-07-01 17:49:05.945 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.945
2025-07-01 17:49:05.945 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.945 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.945
2025-07-01 17:49:05.945 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.945 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.945 alo = 81, ahi = 1101
2025-07-01 17:49:05.945 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.945 blo = 81, bhi = 1101
2025-07-01 17:49:05.945
2025-07-01 17:49:05.945 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.945 g = []
2025-07-01 17:49:05.945 if alo < ahi:
2025-07-01 17:49:05.945 if blo < bhi:
2025-07-01 17:49:05.945 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.945 else:
2025-07-01 17:49:05.945 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.945 elif blo < bhi:
2025-07-01 17:49:05.945 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.945
2025-07-01 17:49:05.945 > yield from g
2025-07-01 17:49:05.946
2025-07-01 17:49:05.946 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.946 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.946
2025-07-01 17:49:05.946 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.946 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.946 alo = 81, ahi = 1101
2025-07-01 17:49:05.946 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.946 blo = 81, bhi = 1101
2025-07-01 17:49:05.946
2025-07-01 17:49:05.946 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.946 r"""
2025-07-01 17:49:05.946 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.946 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.946 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.946 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.946
2025-07-01 17:49:05.946 Example:
2025-07-01 17:49:05.947
2025-07-01 17:49:05.952 >>> d = Differ()
2025-07-01 17:49:05.952 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.952 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.953 >>> print(''.join(results), end="")
2025-07-01 17:49:05.953 - abcDefghiJkl
2025-07-01 17:49:05.953 + abcdefGhijkl
2025-07-01 17:49:05.953 """
2025-07-01 17:49:05.953
2025-07-01 17:49:05.953 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.953 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.953 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.953 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.953 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.953
2025-07-01 17:49:05.953 # search for the pair that matches best without being identical
2025-07-01 17:49:05.953 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.953 # on junk -- unless we have to)
2025-07-01 17:49:05.953 for j in range(blo, bhi):
2025-07-01 17:49:05.953 bj = b[j]
2025-07-01 17:49:05.953 cruncher.set_seq2(bj)
2025-07-01 17:49:05.953 for i in range(alo, ahi):
2025-07-01 17:49:05.954 ai = a[i]
2025-07-01 17:49:05.954 if ai == bj:
2025-07-01 17:49:05.954 if eqi is None:
2025-07-01 17:49:05.954 eqi, eqj = i, j
2025-07-01 17:49:05.954 continue
2025-07-01 17:49:05.954 cruncher.set_seq1(ai)
2025-07-01 17:49:05.954 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.954 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.954 # compares by a factor of 3.
2025-07-01 17:49:05.954 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.954 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.954 # of the computation is cached by cruncher
2025-07-01 17:49:05.954 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.954 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.954 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.954 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.954 if best_ratio < cutoff:
2025-07-01 17:49:05.954 # no non-identical "pretty close" pair
2025-07-01 17:49:05.954 if eqi is None:
2025-07-01 17:49:05.954 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.954 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.955 return
2025-07-01 17:49:05.955 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.955 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.955 else:
2025-07-01 17:49:05.955 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.955 eqi = None
2025-07-01 17:49:05.955
2025-07-01 17:49:05.955 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.955 # identical
2025-07-01 17:49:05.955
2025-07-01 17:49:05.955 # pump out diffs from before the synch point
2025-07-01 17:49:05.955 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.955
2025-07-01 17:49:05.955 # do intraline marking on the synch pair
2025-07-01 17:49:05.955 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.955 if eqi is None:
2025-07-01 17:49:05.955 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.955 atags = btags = ""
2025-07-01 17:49:05.955 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.955 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.956 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.956 if tag == 'replace':
2025-07-01 17:49:05.956 atags += '^' * la
2025-07-01 17:49:05.956 btags += '^' * lb
2025-07-01 17:49:05.956 elif tag == 'delete':
2025-07-01 17:49:05.956 atags += '-' * la
2025-07-01 17:49:05.956 elif tag == 'insert':
2025-07-01 17:49:05.956 btags += '+' * lb
2025-07-01 17:49:05.956 elif tag == 'equal':
2025-07-01 17:49:05.956 atags += ' ' * la
2025-07-01 17:49:05.956 btags += ' ' * lb
2025-07-01 17:49:05.956 else:
2025-07-01 17:49:05.956 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.956 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.956 else:
2025-07-01 17:49:05.956 # the synch pair is identical
2025-07-01 17:49:05.956 yield ' ' + aelt
2025-07-01 17:49:05.956
2025-07-01 17:49:05.956 # pump out diffs from after the synch point
2025-07-01 17:49:05.956 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.956
2025-07-01 17:49:05.957 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.957 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.957
2025-07-01 17:49:05.957 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.957 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.957 alo = 82, ahi = 1101
2025-07-01 17:49:05.957 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.957 blo = 82, bhi = 1101
2025-07-01 17:49:05.957
2025-07-01 17:49:05.957 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.957 g = []
2025-07-01 17:49:05.957 if alo < ahi:
2025-07-01 17:49:05.957 if blo < bhi:
2025-07-01 17:49:05.957 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.957 else:
2025-07-01 17:49:05.957 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.957 elif blo < bhi:
2025-07-01 17:49:05.957 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.957
2025-07-01 17:49:05.958 > yield from g
2025-07-01 17:49:05.958
2025-07-01 17:49:05.958 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.958 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.958
2025-07-01 17:49:05.958 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.958 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.958 alo = 82, ahi = 1101
2025-07-01 17:49:05.958 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.958 blo = 82, bhi = 1101
2025-07-01 17:49:05.958
2025-07-01 17:49:05.958 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.958 r"""
2025-07-01 17:49:05.958 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.958 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.958 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.958 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.958
2025-07-01 17:49:05.958 Example:
2025-07-01 17:49:05.958
2025-07-01 17:49:05.959 >>> d = Differ()
2025-07-01 17:49:05.959 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.959 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.959 >>> print(''.join(results), end="")
2025-07-01 17:49:05.959 - abcDefghiJkl
2025-07-01 17:49:05.959 + abcdefGhijkl
2025-07-01 17:49:05.959 """
2025-07-01 17:49:05.959
2025-07-01 17:49:05.959 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.959 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.959 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.959 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.959 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.959
2025-07-01 17:49:05.959 # search for the pair that matches best without being identical
2025-07-01 17:49:05.959 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.959 # on junk -- unless we have to)
2025-07-01 17:49:05.959 for j in range(blo, bhi):
2025-07-01 17:49:05.959 bj = b[j]
2025-07-01 17:49:05.960 cruncher.set_seq2(bj)
2025-07-01 17:49:05.960 for i in range(alo, ahi):
2025-07-01 17:49:05.960 ai = a[i]
2025-07-01 17:49:05.960 if ai == bj:
2025-07-01 17:49:05.960 if eqi is None:
2025-07-01 17:49:05.960 eqi, eqj = i, j
2025-07-01 17:49:05.960 continue
2025-07-01 17:49:05.960 cruncher.set_seq1(ai)
2025-07-01 17:49:05.960 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.960 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.960 # compares by a factor of 3.
2025-07-01 17:49:05.960 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.960 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.960 # of the computation is cached by cruncher
2025-07-01 17:49:05.960 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.960 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.960 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.960 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.960 if best_ratio < cutoff:
2025-07-01 17:49:05.960 # no non-identical "pretty close" pair
2025-07-01 17:49:05.960 if eqi is None:
2025-07-01 17:49:05.960 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.961 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.961 return
2025-07-01 17:49:05.961 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.961 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.961 else:
2025-07-01 17:49:05.961 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.961 eqi = None
2025-07-01 17:49:05.961
2025-07-01 17:49:05.961 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.961 # identical
2025-07-01 17:49:05.961
2025-07-01 17:49:05.961 # pump out diffs from before the synch point
2025-07-01 17:49:05.961 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.961
2025-07-01 17:49:05.961 # do intraline marking on the synch pair
2025-07-01 17:49:05.961 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.961 if eqi is None:
2025-07-01 17:49:05.961 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.961 atags = btags = ""
2025-07-01 17:49:05.961 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.961 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.962 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.962 if tag == 'replace':
2025-07-01 17:49:05.962 atags += '^' * la
2025-07-01 17:49:05.962 btags += '^' * lb
2025-07-01 17:49:05.962 elif tag == 'delete':
2025-07-01 17:49:05.962 atags += '-' * la
2025-07-01 17:49:05.962 elif tag == 'insert':
2025-07-01 17:49:05.962 btags += '+' * lb
2025-07-01 17:49:05.962 elif tag == 'equal':
2025-07-01 17:49:05.962 atags += ' ' * la
2025-07-01 17:49:05.962 btags += ' ' * lb
2025-07-01 17:49:05.962 else:
2025-07-01 17:49:05.962 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.962 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.962 else:
2025-07-01 17:49:05.962 # the synch pair is identical
2025-07-01 17:49:05.962 yield ' ' + aelt
2025-07-01 17:49:05.962
2025-07-01 17:49:05.962 # pump out diffs from after the synch point
2025-07-01 17:49:05.962 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.962
2025-07-01 17:49:05.962 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.963 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.965
2025-07-01 17:49:05.966 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.966 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.966 alo = 83, ahi = 1101
2025-07-01 17:49:05.966 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.966 blo = 83, bhi = 1101
2025-07-01 17:49:05.966
2025-07-01 17:49:05.966 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.966 g = []
2025-07-01 17:49:05.966 if alo < ahi:
2025-07-01 17:49:05.966 if blo < bhi:
2025-07-01 17:49:05.966 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.966 else:
2025-07-01 17:49:05.966 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.966 elif blo < bhi:
2025-07-01 17:49:05.966 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.966
2025-07-01 17:49:05.966 > yield from g
2025-07-01 17:49:05.966
2025-07-01 17:49:05.966 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.966 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.967
2025-07-01 17:49:05.967 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.967 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.967 alo = 83, ahi = 1101
2025-07-01 17:49:05.967 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.967 blo = 83, bhi = 1101
2025-07-01 17:49:05.967
2025-07-01 17:49:05.967 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.967 r"""
2025-07-01 17:49:05.967 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.967 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.967 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.967 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.967
2025-07-01 17:49:05.967 Example:
2025-07-01 17:49:05.967
2025-07-01 17:49:05.967 >>> d = Differ()
2025-07-01 17:49:05.967 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.967 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.967 >>> print(''.join(results), end="")
2025-07-01 17:49:05.968 - abcDefghiJkl
2025-07-01 17:49:05.968 + abcdefGhijkl
2025-07-01 17:49:05.968 """
2025-07-01 17:49:05.968
2025-07-01 17:49:05.968 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.968 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.968 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.968 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.968 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.968
2025-07-01 17:49:05.968 # search for the pair that matches best without being identical
2025-07-01 17:49:05.968 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.968 # on junk -- unless we have to)
2025-07-01 17:49:05.968 for j in range(blo, bhi):
2025-07-01 17:49:05.968 bj = b[j]
2025-07-01 17:49:05.968 cruncher.set_seq2(bj)
2025-07-01 17:49:05.968 for i in range(alo, ahi):
2025-07-01 17:49:05.968 ai = a[i]
2025-07-01 17:49:05.969 if ai == bj:
2025-07-01 17:49:05.969 if eqi is None:
2025-07-01 17:49:05.969 eqi, eqj = i, j
2025-07-01 17:49:05.969 continue
2025-07-01 17:49:05.969 cruncher.set_seq1(ai)
2025-07-01 17:49:05.969 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.969 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.969 # compares by a factor of 3.
2025-07-01 17:49:05.969 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.969 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.969 # of the computation is cached by cruncher
2025-07-01 17:49:05.969 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.969 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.969 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.969 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.969 if best_ratio < cutoff:
2025-07-01 17:49:05.969 # no non-identical "pretty close" pair
2025-07-01 17:49:05.969 if eqi is None:
2025-07-01 17:49:05.969 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.969 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.970 return
2025-07-01 17:49:05.970 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.970 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.970 else:
2025-07-01 17:49:05.970 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.970 eqi = None
2025-07-01 17:49:05.970
2025-07-01 17:49:05.970 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.970 # identical
2025-07-01 17:49:05.970
2025-07-01 17:49:05.970 # pump out diffs from before the synch point
2025-07-01 17:49:05.970 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.970
2025-07-01 17:49:05.970 # do intraline marking on the synch pair
2025-07-01 17:49:05.970 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.970 if eqi is None:
2025-07-01 17:49:05.970 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.970 atags = btags = ""
2025-07-01 17:49:05.970 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.970 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.970 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.971 if tag == 'replace':
2025-07-01 17:49:05.971 atags += '^' * la
2025-07-01 17:49:05.971 btags += '^' * lb
2025-07-01 17:49:05.971 elif tag == 'delete':
2025-07-01 17:49:05.971 atags += '-' * la
2025-07-01 17:49:05.971 elif tag == 'insert':
2025-07-01 17:49:05.971 btags += '+' * lb
2025-07-01 17:49:05.971 elif tag == 'equal':
2025-07-01 17:49:05.971 atags += ' ' * la
2025-07-01 17:49:05.971 btags += ' ' * lb
2025-07-01 17:49:05.971 else:
2025-07-01 17:49:05.971 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.971 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.971 else:
2025-07-01 17:49:05.971 # the synch pair is identical
2025-07-01 17:49:05.971 yield ' ' + aelt
2025-07-01 17:49:05.971
2025-07-01 17:49:05.971 # pump out diffs from after the synch point
2025-07-01 17:49:05.971 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.971
2025-07-01 17:49:05.971 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.971 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.972
2025-07-01 17:49:05.972 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.972 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.972 alo = 84, ahi = 1101
2025-07-01 17:49:05.972 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.972 blo = 84, bhi = 1101
2025-07-01 17:49:05.972
2025-07-01 17:49:05.972 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.972 g = []
2025-07-01 17:49:05.972 if alo < ahi:
2025-07-01 17:49:05.972 if blo < bhi:
2025-07-01 17:49:05.972 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.972 else:
2025-07-01 17:49:05.972 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.972 elif blo < bhi:
2025-07-01 17:49:05.972 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.972
2025-07-01 17:49:05.972 > yield from g
2025-07-01 17:49:05.972
2025-07-01 17:49:05.972 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.972 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.973
2025-07-01 17:49:05.973 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.973 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.973 alo = 84, ahi = 1101
2025-07-01 17:49:05.973 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.973 blo = 84, bhi = 1101
2025-07-01 17:49:05.973
2025-07-01 17:49:05.973 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.973 r"""
2025-07-01 17:49:05.973 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.973 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.973 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.973 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.973
2025-07-01 17:49:05.973 Example:
2025-07-01 17:49:05.973
2025-07-01 17:49:05.973 >>> d = Differ()
2025-07-01 17:49:05.973 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.973 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.973 >>> print(''.join(results), end="")
2025-07-01 17:49:05.973 - abcDefghiJkl
2025-07-01 17:49:05.974 + abcdefGhijkl
2025-07-01 17:49:05.974 """
2025-07-01 17:49:05.974
2025-07-01 17:49:05.974 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.974 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.974 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.974 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.974 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.974
2025-07-01 17:49:05.974 # search for the pair that matches best without being identical
2025-07-01 17:49:05.974 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.974 # on junk -- unless we have to)
2025-07-01 17:49:05.974 for j in range(blo, bhi):
2025-07-01 17:49:05.974 bj = b[j]
2025-07-01 17:49:05.974 cruncher.set_seq2(bj)
2025-07-01 17:49:05.974 for i in range(alo, ahi):
2025-07-01 17:49:05.974 ai = a[i]
2025-07-01 17:49:05.974 if ai == bj:
2025-07-01 17:49:05.974 if eqi is None:
2025-07-01 17:49:05.974 eqi, eqj = i, j
2025-07-01 17:49:05.975 continue
2025-07-01 17:49:05.975 cruncher.set_seq1(ai)
2025-07-01 17:49:05.975 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.975 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.975 # compares by a factor of 3.
2025-07-01 17:49:05.975 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.975 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.975 # of the computation is cached by cruncher
2025-07-01 17:49:05.975 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.975 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.975 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.975 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.975 if best_ratio < cutoff:
2025-07-01 17:49:05.975 # no non-identical "pretty close" pair
2025-07-01 17:49:05.975 if eqi is None:
2025-07-01 17:49:05.975 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.975 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.975 return
2025-07-01 17:49:05.975 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.975 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.975 else:
2025-07-01 17:49:05.976 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.976 eqi = None
2025-07-01 17:49:05.976
2025-07-01 17:49:05.976 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.976 # identical
2025-07-01 17:49:05.976
2025-07-01 17:49:05.976 # pump out diffs from before the synch point
2025-07-01 17:49:05.976 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.976
2025-07-01 17:49:05.976 # do intraline marking on the synch pair
2025-07-01 17:49:05.976 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.976 if eqi is None:
2025-07-01 17:49:05.976 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.976 atags = btags = ""
2025-07-01 17:49:05.976 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.976 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.976 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.976 if tag == 'replace':
2025-07-01 17:49:05.976 atags += '^' * la
2025-07-01 17:49:05.976 btags += '^' * lb
2025-07-01 17:49:05.976 elif tag == 'delete':
2025-07-01 17:49:05.977 atags += '-' * la
2025-07-01 17:49:05.977 elif tag == 'insert':
2025-07-01 17:49:05.977 btags += '+' * lb
2025-07-01 17:49:05.977 elif tag == 'equal':
2025-07-01 17:49:05.977 atags += ' ' * la
2025-07-01 17:49:05.977 btags += ' ' * lb
2025-07-01 17:49:05.977 else:
2025-07-01 17:49:05.977 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.977 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.977 else:
2025-07-01 17:49:05.977 # the synch pair is identical
2025-07-01 17:49:05.977 yield ' ' + aelt
2025-07-01 17:49:05.977
2025-07-01 17:49:05.977 # pump out diffs from after the synch point
2025-07-01 17:49:05.977 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.977
2025-07-01 17:49:05.977 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.977 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.977
2025-07-01 17:49:05.977 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.977 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.978 alo = 85, ahi = 1101
2025-07-01 17:49:05.978 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.978 blo = 85, bhi = 1101
2025-07-01 17:49:05.978
2025-07-01 17:49:05.978 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.978 g = []
2025-07-01 17:49:05.978 if alo < ahi:
2025-07-01 17:49:05.978 if blo < bhi:
2025-07-01 17:49:05.978 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.978 else:
2025-07-01 17:49:05.978 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.978 elif blo < bhi:
2025-07-01 17:49:05.978 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.978
2025-07-01 17:49:05.978 > yield from g
2025-07-01 17:49:05.978
2025-07-01 17:49:05.978 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.978 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.978
2025-07-01 17:49:05.978 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.979 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.984 alo = 85, ahi = 1101
2025-07-01 17:49:05.984 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.984 blo = 85, bhi = 1101
2025-07-01 17:49:05.984
2025-07-01 17:49:05.984 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.984 r"""
2025-07-01 17:49:05.984 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.984 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.984 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.984 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.984
2025-07-01 17:49:05.984 Example:
2025-07-01 17:49:05.984
2025-07-01 17:49:05.984 >>> d = Differ()
2025-07-01 17:49:05.984 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.984 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.984 >>> print(''.join(results), end="")
2025-07-01 17:49:05.984 - abcDefghiJkl
2025-07-01 17:49:05.984 + abcdefGhijkl
2025-07-01 17:49:05.985 """
2025-07-01 17:49:05.985
2025-07-01 17:49:05.985 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.985 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.985 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.985 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.985 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.985
2025-07-01 17:49:05.985 # search for the pair that matches best without being identical
2025-07-01 17:49:05.985 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.985 # on junk -- unless we have to)
2025-07-01 17:49:05.985 for j in range(blo, bhi):
2025-07-01 17:49:05.985 bj = b[j]
2025-07-01 17:49:05.985 cruncher.set_seq2(bj)
2025-07-01 17:49:05.985 for i in range(alo, ahi):
2025-07-01 17:49:05.985 ai = a[i]
2025-07-01 17:49:05.985 if ai == bj:
2025-07-01 17:49:05.985 if eqi is None:
2025-07-01 17:49:05.985 eqi, eqj = i, j
2025-07-01 17:49:05.985 continue
2025-07-01 17:49:05.986 cruncher.set_seq1(ai)
2025-07-01 17:49:05.986 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.986 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.986 # compares by a factor of 3.
2025-07-01 17:49:05.986 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.986 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.986 # of the computation is cached by cruncher
2025-07-01 17:49:05.986 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.986 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.986 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.986 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.986 if best_ratio < cutoff:
2025-07-01 17:49:05.986 # no non-identical "pretty close" pair
2025-07-01 17:49:05.986 if eqi is None:
2025-07-01 17:49:05.986 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.986 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.986 return
2025-07-01 17:49:05.986 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.986 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.986 else:
2025-07-01 17:49:05.986 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.987 eqi = None
2025-07-01 17:49:05.987
2025-07-01 17:49:05.987 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.987 # identical
2025-07-01 17:49:05.987
2025-07-01 17:49:05.987 # pump out diffs from before the synch point
2025-07-01 17:49:05.987 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.987
2025-07-01 17:49:05.987 # do intraline marking on the synch pair
2025-07-01 17:49:05.987 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.987 if eqi is None:
2025-07-01 17:49:05.987 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.987 atags = btags = ""
2025-07-01 17:49:05.987 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.987 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.987 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.987 if tag == 'replace':
2025-07-01 17:49:05.987 atags += '^' * la
2025-07-01 17:49:05.987 btags += '^' * lb
2025-07-01 17:49:05.987 elif tag == 'delete':
2025-07-01 17:49:05.987 atags += '-' * la
2025-07-01 17:49:05.987 elif tag == 'insert':
2025-07-01 17:49:05.988 btags += '+' * lb
2025-07-01 17:49:05.988 elif tag == 'equal':
2025-07-01 17:49:05.988 atags += ' ' * la
2025-07-01 17:49:05.988 btags += ' ' * lb
2025-07-01 17:49:05.988 else:
2025-07-01 17:49:05.988 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.988 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.988 else:
2025-07-01 17:49:05.988 # the synch pair is identical
2025-07-01 17:49:05.988 yield ' ' + aelt
2025-07-01 17:49:05.988
2025-07-01 17:49:05.988 # pump out diffs from after the synch point
2025-07-01 17:49:05.988 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.988
2025-07-01 17:49:05.988 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.988 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.988
2025-07-01 17:49:05.988 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.988 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.988 alo = 86, ahi = 1101
2025-07-01 17:49:05.988 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.989 blo = 86, bhi = 1101
2025-07-01 17:49:05.989
2025-07-01 17:49:05.989 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.989 g = []
2025-07-01 17:49:05.989 if alo < ahi:
2025-07-01 17:49:05.989 if blo < bhi:
2025-07-01 17:49:05.989 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.989 else:
2025-07-01 17:49:05.989 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.989 elif blo < bhi:
2025-07-01 17:49:05.989 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.989
2025-07-01 17:49:05.989 > yield from g
2025-07-01 17:49:05.989
2025-07-01 17:49:05.989 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.989 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.989
2025-07-01 17:49:05.989 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.989 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.990 alo = 86, ahi = 1101
2025-07-01 17:49:05.990 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.990 blo = 86, bhi = 1101
2025-07-01 17:49:05.990
2025-07-01 17:49:05.990 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.990 r"""
2025-07-01 17:49:05.990 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.990 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.990 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.990 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.990
2025-07-01 17:49:05.990 Example:
2025-07-01 17:49:05.990
2025-07-01 17:49:05.990 >>> d = Differ()
2025-07-01 17:49:05.990 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.990 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.990 >>> print(''.join(results), end="")
2025-07-01 17:49:05.990 - abcDefghiJkl
2025-07-01 17:49:05.990 + abcdefGhijkl
2025-07-01 17:49:05.991 """
2025-07-01 17:49:05.991
2025-07-01 17:49:05.991 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:05.991 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:05.991 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:05.991 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:05.991 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:05.991
2025-07-01 17:49:05.991 # search for the pair that matches best without being identical
2025-07-01 17:49:05.991 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:05.991 # on junk -- unless we have to)
2025-07-01 17:49:05.991 for j in range(blo, bhi):
2025-07-01 17:49:05.991 bj = b[j]
2025-07-01 17:49:05.991 cruncher.set_seq2(bj)
2025-07-01 17:49:05.991 for i in range(alo, ahi):
2025-07-01 17:49:05.991 ai = a[i]
2025-07-01 17:49:05.991 if ai == bj:
2025-07-01 17:49:05.991 if eqi is None:
2025-07-01 17:49:05.991 eqi, eqj = i, j
2025-07-01 17:49:05.991 continue
2025-07-01 17:49:05.992 cruncher.set_seq1(ai)
2025-07-01 17:49:05.992 # computing similarity is expensive, so use the quick
2025-07-01 17:49:05.992 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:05.992 # compares by a factor of 3.
2025-07-01 17:49:05.992 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:05.992 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:05.992 # of the computation is cached by cruncher
2025-07-01 17:49:05.992 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:05.992 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:05.992 cruncher.ratio() > best_ratio:
2025-07-01 17:49:05.992 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:05.992 if best_ratio < cutoff:
2025-07-01 17:49:05.992 # no non-identical "pretty close" pair
2025-07-01 17:49:05.992 if eqi is None:
2025-07-01 17:49:05.992 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:05.992 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.992 return
2025-07-01 17:49:05.992 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:05.992 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:05.993 else:
2025-07-01 17:49:05.993 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:05.993 eqi = None
2025-07-01 17:49:05.993
2025-07-01 17:49:05.993 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:05.993 # identical
2025-07-01 17:49:05.993
2025-07-01 17:49:05.993 # pump out diffs from before the synch point
2025-07-01 17:49:05.993 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:05.993
2025-07-01 17:49:05.993 # do intraline marking on the synch pair
2025-07-01 17:49:05.993 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:05.993 if eqi is None:
2025-07-01 17:49:05.993 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:05.993 atags = btags = ""
2025-07-01 17:49:05.993 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:05.993 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:05.993 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:05.993 if tag == 'replace':
2025-07-01 17:49:05.993 atags += '^' * la
2025-07-01 17:49:05.993 btags += '^' * lb
2025-07-01 17:49:05.996 elif tag == 'delete':
2025-07-01 17:49:05.996 atags += '-' * la
2025-07-01 17:49:05.997 elif tag == 'insert':
2025-07-01 17:49:05.997 btags += '+' * lb
2025-07-01 17:49:05.997 elif tag == 'equal':
2025-07-01 17:49:05.997 atags += ' ' * la
2025-07-01 17:49:05.997 btags += ' ' * lb
2025-07-01 17:49:05.997 else:
2025-07-01 17:49:05.997 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:05.997 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:05.997 else:
2025-07-01 17:49:05.997 # the synch pair is identical
2025-07-01 17:49:05.997 yield ' ' + aelt
2025-07-01 17:49:05.997
2025-07-01 17:49:05.997 # pump out diffs from after the synch point
2025-07-01 17:49:05.997 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:05.997
2025-07-01 17:49:05.997 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:05.997 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.997
2025-07-01 17:49:05.997 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.997 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.998 alo = 87, ahi = 1101
2025-07-01 17:49:05.998 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.998 blo = 87, bhi = 1101
2025-07-01 17:49:05.998
2025-07-01 17:49:05.998 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.998 g = []
2025-07-01 17:49:05.998 if alo < ahi:
2025-07-01 17:49:05.998 if blo < bhi:
2025-07-01 17:49:05.998 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:05.998 else:
2025-07-01 17:49:05.998 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:05.998 elif blo < bhi:
2025-07-01 17:49:05.998 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:05.998
2025-07-01 17:49:05.998 > yield from g
2025-07-01 17:49:05.998
2025-07-01 17:49:05.998 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:05.998 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:05.998
2025-07-01 17:49:05.998 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:05.999 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:05.999 alo = 87, ahi = 1101
2025-07-01 17:49:05.999 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:05.999 blo = 87, bhi = 1101
2025-07-01 17:49:05.999
2025-07-01 17:49:05.999 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:05.999 r"""
2025-07-01 17:49:05.999 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:05.999 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:05.999 synch point, and intraline difference marking is done on the
2025-07-01 17:49:05.999 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:05.999
2025-07-01 17:49:05.999 Example:
2025-07-01 17:49:05.999
2025-07-01 17:49:05.999 >>> d = Differ()
2025-07-01 17:49:05.999 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:05.999 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:05.999 >>> print(''.join(results), end="")
2025-07-01 17:49:05.999 - abcDefghiJkl
2025-07-01 17:49:05.999 + abcdefGhijkl
2025-07-01 17:49:06.000 """
2025-07-01 17:49:06.000
2025-07-01 17:49:06.000 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.000 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.000 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.000 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.000 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.000
2025-07-01 17:49:06.000 # search for the pair that matches best without being identical
2025-07-01 17:49:06.000 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.000 # on junk -- unless we have to)
2025-07-01 17:49:06.000 for j in range(blo, bhi):
2025-07-01 17:49:06.000 bj = b[j]
2025-07-01 17:49:06.000 cruncher.set_seq2(bj)
2025-07-01 17:49:06.000 for i in range(alo, ahi):
2025-07-01 17:49:06.000 ai = a[i]
2025-07-01 17:49:06.000 if ai == bj:
2025-07-01 17:49:06.000 if eqi is None:
2025-07-01 17:49:06.000 eqi, eqj = i, j
2025-07-01 17:49:06.000 continue
2025-07-01 17:49:06.001 cruncher.set_seq1(ai)
2025-07-01 17:49:06.001 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.001 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.001 # compares by a factor of 3.
2025-07-01 17:49:06.001 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.001 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.001 # of the computation is cached by cruncher
2025-07-01 17:49:06.001 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.001 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.001 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.001 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.001 if best_ratio < cutoff:
2025-07-01 17:49:06.001 # no non-identical "pretty close" pair
2025-07-01 17:49:06.001 if eqi is None:
2025-07-01 17:49:06.001 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.001 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.001 return
2025-07-01 17:49:06.001 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.002 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.002 else:
2025-07-01 17:49:06.002 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.002 eqi = None
2025-07-01 17:49:06.002
2025-07-01 17:49:06.002 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.002 # identical
2025-07-01 17:49:06.002
2025-07-01 17:49:06.002 # pump out diffs from before the synch point
2025-07-01 17:49:06.002 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.002
2025-07-01 17:49:06.002 # do intraline marking on the synch pair
2025-07-01 17:49:06.002 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.002 if eqi is None:
2025-07-01 17:49:06.002 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.002 atags = btags = ""
2025-07-01 17:49:06.002 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.002 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.002 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.002 if tag == 'replace':
2025-07-01 17:49:06.002 atags += '^' * la
2025-07-01 17:49:06.003 btags += '^' * lb
2025-07-01 17:49:06.003 elif tag == 'delete':
2025-07-01 17:49:06.003 atags += '-' * la
2025-07-01 17:49:06.003 elif tag == 'insert':
2025-07-01 17:49:06.003 btags += '+' * lb
2025-07-01 17:49:06.003 elif tag == 'equal':
2025-07-01 17:49:06.003 atags += ' ' * la
2025-07-01 17:49:06.003 btags += ' ' * lb
2025-07-01 17:49:06.003 else:
2025-07-01 17:49:06.003 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.003 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.003 else:
2025-07-01 17:49:06.003 # the synch pair is identical
2025-07-01 17:49:06.003 yield ' ' + aelt
2025-07-01 17:49:06.003
2025-07-01 17:49:06.003 # pump out diffs from after the synch point
2025-07-01 17:49:06.003 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.003
2025-07-01 17:49:06.003 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.003 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.003
2025-07-01 17:49:06.004 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.004 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.004 alo = 88, ahi = 1101
2025-07-01 17:49:06.004 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.004 blo = 88, bhi = 1101
2025-07-01 17:49:06.004
2025-07-01 17:49:06.004 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.004 g = []
2025-07-01 17:49:06.004 if alo < ahi:
2025-07-01 17:49:06.004 if blo < bhi:
2025-07-01 17:49:06.004 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.004 else:
2025-07-01 17:49:06.004 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.004 elif blo < bhi:
2025-07-01 17:49:06.004 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.004
2025-07-01 17:49:06.004 > yield from g
2025-07-01 17:49:06.004
2025-07-01 17:49:06.004 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.004 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.005
2025-07-01 17:49:06.005 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.005 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.005 alo = 88, ahi = 1101
2025-07-01 17:49:06.005 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.005 blo = 88, bhi = 1101
2025-07-01 17:49:06.005
2025-07-01 17:49:06.005 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.005 r"""
2025-07-01 17:49:06.005 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.005 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.005 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.005 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.005
2025-07-01 17:49:06.005 Example:
2025-07-01 17:49:06.005
2025-07-01 17:49:06.005 >>> d = Differ()
2025-07-01 17:49:06.005 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.005 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.005 >>> print(''.join(results), end="")
2025-07-01 17:49:06.006 - abcDefghiJkl
2025-07-01 17:49:06.006 + abcdefGhijkl
2025-07-01 17:49:06.006 """
2025-07-01 17:49:06.006
2025-07-01 17:49:06.006 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.006 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.006 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.006 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.006 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.006
2025-07-01 17:49:06.006 # search for the pair that matches best without being identical
2025-07-01 17:49:06.006 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.006 # on junk -- unless we have to)
2025-07-01 17:49:06.006 for j in range(blo, bhi):
2025-07-01 17:49:06.006 bj = b[j]
2025-07-01 17:49:06.006 cruncher.set_seq2(bj)
2025-07-01 17:49:06.006 for i in range(alo, ahi):
2025-07-01 17:49:06.006 ai = a[i]
2025-07-01 17:49:06.006 if ai == bj:
2025-07-01 17:49:06.007 if eqi is None:
2025-07-01 17:49:06.007 eqi, eqj = i, j
2025-07-01 17:49:06.007 continue
2025-07-01 17:49:06.007 cruncher.set_seq1(ai)
2025-07-01 17:49:06.007 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.007 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.007 # compares by a factor of 3.
2025-07-01 17:49:06.007 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.007 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.007 # of the computation is cached by cruncher
2025-07-01 17:49:06.007 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.007 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.007 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.007 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.007 if best_ratio < cutoff:
2025-07-01 17:49:06.007 # no non-identical "pretty close" pair
2025-07-01 17:49:06.007 if eqi is None:
2025-07-01 17:49:06.007 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.007 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.007 return
2025-07-01 17:49:06.008 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.008 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.008 else:
2025-07-01 17:49:06.008 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.008 eqi = None
2025-07-01 17:49:06.008
2025-07-01 17:49:06.008 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.008 # identical
2025-07-01 17:49:06.008
2025-07-01 17:49:06.008 # pump out diffs from before the synch point
2025-07-01 17:49:06.008 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.008
2025-07-01 17:49:06.008 # do intraline marking on the synch pair
2025-07-01 17:49:06.008 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.008 if eqi is None:
2025-07-01 17:49:06.008 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.008 atags = btags = ""
2025-07-01 17:49:06.008 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.008 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.008 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.009 if tag == 'replace':
2025-07-01 17:49:06.009 atags += '^' * la
2025-07-01 17:49:06.009 btags += '^' * lb
2025-07-01 17:49:06.009 elif tag == 'delete':
2025-07-01 17:49:06.009 atags += '-' * la
2025-07-01 17:49:06.009 elif tag == 'insert':
2025-07-01 17:49:06.009 btags += '+' * lb
2025-07-01 17:49:06.009 elif tag == 'equal':
2025-07-01 17:49:06.009 atags += ' ' * la
2025-07-01 17:49:06.009 btags += ' ' * lb
2025-07-01 17:49:06.009 else:
2025-07-01 17:49:06.009 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.009 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.009 else:
2025-07-01 17:49:06.009 # the synch pair is identical
2025-07-01 17:49:06.009 yield ' ' + aelt
2025-07-01 17:49:06.009
2025-07-01 17:49:06.009 # pump out diffs from after the synch point
2025-07-01 17:49:06.009 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.009
2025-07-01 17:49:06.015 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.015 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.015
2025-07-01 17:49:06.015 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.015 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.015 alo = 89, ahi = 1101
2025-07-01 17:49:06.015 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.015 blo = 89, bhi = 1101
2025-07-01 17:49:06.015
2025-07-01 17:49:06.015 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.015 g = []
2025-07-01 17:49:06.015 if alo < ahi:
2025-07-01 17:49:06.015 if blo < bhi:
2025-07-01 17:49:06.015 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.015 else:
2025-07-01 17:49:06.015 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.015 elif blo < bhi:
2025-07-01 17:49:06.015 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.016
2025-07-01 17:49:06.016 > yield from g
2025-07-01 17:49:06.016
2025-07-01 17:49:06.016 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.016 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.016
2025-07-01 17:49:06.016 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.016 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.016 alo = 89, ahi = 1101
2025-07-01 17:49:06.016 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.016 blo = 89, bhi = 1101
2025-07-01 17:49:06.016
2025-07-01 17:49:06.016 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.016 r"""
2025-07-01 17:49:06.016 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.016 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.016 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.016 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.016
2025-07-01 17:49:06.016 Example:
2025-07-01 17:49:06.016
2025-07-01 17:49:06.017 >>> d = Differ()
2025-07-01 17:49:06.017 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.017 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.017 >>> print(''.join(results), end="")
2025-07-01 17:49:06.017 - abcDefghiJkl
2025-07-01 17:49:06.017 + abcdefGhijkl
2025-07-01 17:49:06.017 """
2025-07-01 17:49:06.017
2025-07-01 17:49:06.017 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.017 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.017 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.017 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.017 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.017
2025-07-01 17:49:06.017 # search for the pair that matches best without being identical
2025-07-01 17:49:06.017 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.017 # on junk -- unless we have to)
2025-07-01 17:49:06.017 for j in range(blo, bhi):
2025-07-01 17:49:06.017 bj = b[j]
2025-07-01 17:49:06.017 cruncher.set_seq2(bj)
2025-07-01 17:49:06.018 for i in range(alo, ahi):
2025-07-01 17:49:06.018 ai = a[i]
2025-07-01 17:49:06.018 if ai == bj:
2025-07-01 17:49:06.018 if eqi is None:
2025-07-01 17:49:06.018 eqi, eqj = i, j
2025-07-01 17:49:06.018 continue
2025-07-01 17:49:06.018 cruncher.set_seq1(ai)
2025-07-01 17:49:06.018 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.018 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.018 # compares by a factor of 3.
2025-07-01 17:49:06.018 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.018 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.018 # of the computation is cached by cruncher
2025-07-01 17:49:06.018 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.018 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.018 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.018 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.018 if best_ratio < cutoff:
2025-07-01 17:49:06.018 # no non-identical "pretty close" pair
2025-07-01 17:49:06.018 if eqi is None:
2025-07-01 17:49:06.018 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.019 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.019 return
2025-07-01 17:49:06.019 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.019 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.019 else:
2025-07-01 17:49:06.019 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.019 eqi = None
2025-07-01 17:49:06.019
2025-07-01 17:49:06.019 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.019 # identical
2025-07-01 17:49:06.019
2025-07-01 17:49:06.019 # pump out diffs from before the synch point
2025-07-01 17:49:06.019 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.019
2025-07-01 17:49:06.019 # do intraline marking on the synch pair
2025-07-01 17:49:06.019 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.019 if eqi is None:
2025-07-01 17:49:06.019 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.019 atags = btags = ""
2025-07-01 17:49:06.019 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.019 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.019 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.020 if tag == 'replace':
2025-07-01 17:49:06.020 atags += '^' * la
2025-07-01 17:49:06.020 btags += '^' * lb
2025-07-01 17:49:06.020 elif tag == 'delete':
2025-07-01 17:49:06.020 atags += '-' * la
2025-07-01 17:49:06.020 elif tag == 'insert':
2025-07-01 17:49:06.020 btags += '+' * lb
2025-07-01 17:49:06.020 elif tag == 'equal':
2025-07-01 17:49:06.020 atags += ' ' * la
2025-07-01 17:49:06.020 btags += ' ' * lb
2025-07-01 17:49:06.020 else:
2025-07-01 17:49:06.020 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.020 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.020 else:
2025-07-01 17:49:06.020 # the synch pair is identical
2025-07-01 17:49:06.020 yield ' ' + aelt
2025-07-01 17:49:06.020
2025-07-01 17:49:06.020 # pump out diffs from after the synch point
2025-07-01 17:49:06.020 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.020
2025-07-01 17:49:06.020 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.021 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.021
2025-07-01 17:49:06.021 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.021 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.021 alo = 92, ahi = 1101
2025-07-01 17:49:06.021 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.021 blo = 92, bhi = 1101
2025-07-01 17:49:06.021
2025-07-01 17:49:06.021 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.021 g = []
2025-07-01 17:49:06.021 if alo < ahi:
2025-07-01 17:49:06.021 if blo < bhi:
2025-07-01 17:49:06.021 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.021 else:
2025-07-01 17:49:06.021 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.021 elif blo < bhi:
2025-07-01 17:49:06.021 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.021
2025-07-01 17:49:06.021 > yield from g
2025-07-01 17:49:06.021
2025-07-01 17:49:06.021 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.021 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.022
2025-07-01 17:49:06.022 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.022 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.022 alo = 92, ahi = 1101
2025-07-01 17:49:06.022 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.022 blo = 92, bhi = 1101
2025-07-01 17:49:06.022
2025-07-01 17:49:06.022 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.022 r"""
2025-07-01 17:49:06.022 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.022 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.022 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.022 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.022
2025-07-01 17:49:06.022 Example:
2025-07-01 17:49:06.022
2025-07-01 17:49:06.022 >>> d = Differ()
2025-07-01 17:49:06.022 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.022 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.023 >>> print(''.join(results), end="")
2025-07-01 17:49:06.023 - abcDefghiJkl
2025-07-01 17:49:06.023 + abcdefGhijkl
2025-07-01 17:49:06.023 """
2025-07-01 17:49:06.023
2025-07-01 17:49:06.023 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.023 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.023 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.023 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.023 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.023
2025-07-01 17:49:06.023 # search for the pair that matches best without being identical
2025-07-01 17:49:06.023 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.023 # on junk -- unless we have to)
2025-07-01 17:49:06.023 for j in range(blo, bhi):
2025-07-01 17:49:06.023 bj = b[j]
2025-07-01 17:49:06.023 cruncher.set_seq2(bj)
2025-07-01 17:49:06.023 for i in range(alo, ahi):
2025-07-01 17:49:06.024 ai = a[i]
2025-07-01 17:49:06.024 if ai == bj:
2025-07-01 17:49:06.024 if eqi is None:
2025-07-01 17:49:06.024 eqi, eqj = i, j
2025-07-01 17:49:06.024 continue
2025-07-01 17:49:06.024 cruncher.set_seq1(ai)
2025-07-01 17:49:06.024 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.024 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.024 # compares by a factor of 3.
2025-07-01 17:49:06.024 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.024 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.024 # of the computation is cached by cruncher
2025-07-01 17:49:06.024 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.024 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.024 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.024 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.024 if best_ratio < cutoff:
2025-07-01 17:49:06.024 # no non-identical "pretty close" pair
2025-07-01 17:49:06.024 if eqi is None:
2025-07-01 17:49:06.024 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.025 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.028 return
2025-07-01 17:49:06.028 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.028 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.028 else:
2025-07-01 17:49:06.028 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.028 eqi = None
2025-07-01 17:49:06.028
2025-07-01 17:49:06.028 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.028 # identical
2025-07-01 17:49:06.028
2025-07-01 17:49:06.028 # pump out diffs from before the synch point
2025-07-01 17:49:06.028 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.028
2025-07-01 17:49:06.028 # do intraline marking on the synch pair
2025-07-01 17:49:06.028 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.028 if eqi is None:
2025-07-01 17:49:06.028 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.028 atags = btags = ""
2025-07-01 17:49:06.028 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.028 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.028 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.029 if tag == 'replace':
2025-07-01 17:49:06.029 atags += '^' * la
2025-07-01 17:49:06.029 btags += '^' * lb
2025-07-01 17:49:06.029 elif tag == 'delete':
2025-07-01 17:49:06.029 atags += '-' * la
2025-07-01 17:49:06.029 elif tag == 'insert':
2025-07-01 17:49:06.029 btags += '+' * lb
2025-07-01 17:49:06.029 elif tag == 'equal':
2025-07-01 17:49:06.029 atags += ' ' * la
2025-07-01 17:49:06.029 btags += ' ' * lb
2025-07-01 17:49:06.029 else:
2025-07-01 17:49:06.029 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.029 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.029 else:
2025-07-01 17:49:06.029 # the synch pair is identical
2025-07-01 17:49:06.029 yield ' ' + aelt
2025-07-01 17:49:06.029
2025-07-01 17:49:06.029 # pump out diffs from after the synch point
2025-07-01 17:49:06.029 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.029
2025-07-01 17:49:06.029 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.030 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.030
2025-07-01 17:49:06.030 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.030 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.030 alo = 93, ahi = 1101
2025-07-01 17:49:06.030 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.030 blo = 93, bhi = 1101
2025-07-01 17:49:06.030
2025-07-01 17:49:06.030 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.030 g = []
2025-07-01 17:49:06.030 if alo < ahi:
2025-07-01 17:49:06.030 if blo < bhi:
2025-07-01 17:49:06.030 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.030 else:
2025-07-01 17:49:06.030 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.030 elif blo < bhi:
2025-07-01 17:49:06.030 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.030
2025-07-01 17:49:06.030 > yield from g
2025-07-01 17:49:06.030
2025-07-01 17:49:06.030 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.030 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.031
2025-07-01 17:49:06.031 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.031 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.031 alo = 93, ahi = 1101
2025-07-01 17:49:06.031 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.031 blo = 93, bhi = 1101
2025-07-01 17:49:06.031
2025-07-01 17:49:06.031 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.031 r"""
2025-07-01 17:49:06.031 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.031 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.031 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.031 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.031
2025-07-01 17:49:06.031 Example:
2025-07-01 17:49:06.031
2025-07-01 17:49:06.031 >>> d = Differ()
2025-07-01 17:49:06.031 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.031 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.032 >>> print(''.join(results), end="")
2025-07-01 17:49:06.032 - abcDefghiJkl
2025-07-01 17:49:06.032 + abcdefGhijkl
2025-07-01 17:49:06.032 """
2025-07-01 17:49:06.032
2025-07-01 17:49:06.032 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.032 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.032 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.032 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.032 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.032
2025-07-01 17:49:06.032 # search for the pair that matches best without being identical
2025-07-01 17:49:06.032 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.032 # on junk -- unless we have to)
2025-07-01 17:49:06.032 for j in range(blo, bhi):
2025-07-01 17:49:06.032 bj = b[j]
2025-07-01 17:49:06.032 cruncher.set_seq2(bj)
2025-07-01 17:49:06.032 for i in range(alo, ahi):
2025-07-01 17:49:06.032 ai = a[i]
2025-07-01 17:49:06.032 if ai == bj:
2025-07-01 17:49:06.033 if eqi is None:
2025-07-01 17:49:06.033 eqi, eqj = i, j
2025-07-01 17:49:06.033 continue
2025-07-01 17:49:06.033 cruncher.set_seq1(ai)
2025-07-01 17:49:06.033 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.033 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.033 # compares by a factor of 3.
2025-07-01 17:49:06.033 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.033 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.033 # of the computation is cached by cruncher
2025-07-01 17:49:06.033 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.033 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.033 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.033 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.033 if best_ratio < cutoff:
2025-07-01 17:49:06.033 # no non-identical "pretty close" pair
2025-07-01 17:49:06.033 if eqi is None:
2025-07-01 17:49:06.033 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.033 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.033 return
2025-07-01 17:49:06.033 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.034 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.034 else:
2025-07-01 17:49:06.034 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.034 eqi = None
2025-07-01 17:49:06.034
2025-07-01 17:49:06.034 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.034 # identical
2025-07-01 17:49:06.034
2025-07-01 17:49:06.034 # pump out diffs from before the synch point
2025-07-01 17:49:06.034 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.034
2025-07-01 17:49:06.034 # do intraline marking on the synch pair
2025-07-01 17:49:06.034 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.034 if eqi is None:
2025-07-01 17:49:06.034 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.034 atags = btags = ""
2025-07-01 17:49:06.034 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.034 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.035 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.035 if tag == 'replace':
2025-07-01 17:49:06.035 atags += '^' * la
2025-07-01 17:49:06.035 btags += '^' * lb
2025-07-01 17:49:06.035 elif tag == 'delete':
2025-07-01 17:49:06.035 atags += '-' * la
2025-07-01 17:49:06.035 elif tag == 'insert':
2025-07-01 17:49:06.035 btags += '+' * lb
2025-07-01 17:49:06.035 elif tag == 'equal':
2025-07-01 17:49:06.035 atags += ' ' * la
2025-07-01 17:49:06.035 btags += ' ' * lb
2025-07-01 17:49:06.035 else:
2025-07-01 17:49:06.035 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.035 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.035 else:
2025-07-01 17:49:06.035 # the synch pair is identical
2025-07-01 17:49:06.035 yield ' ' + aelt
2025-07-01 17:49:06.035
2025-07-01 17:49:06.035 # pump out diffs from after the synch point
2025-07-01 17:49:06.035 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.035
2025-07-01 17:49:06.036 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.036 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.036
2025-07-01 17:49:06.036 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.036 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.036 alo = 94, ahi = 1101
2025-07-01 17:49:06.036 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.036 blo = 94, bhi = 1101
2025-07-01 17:49:06.036
2025-07-01 17:49:06.036 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.036 g = []
2025-07-01 17:49:06.036 if alo < ahi:
2025-07-01 17:49:06.036 if blo < bhi:
2025-07-01 17:49:06.036 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.036 else:
2025-07-01 17:49:06.036 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.036 elif blo < bhi:
2025-07-01 17:49:06.036 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.036
2025-07-01 17:49:06.036 > yield from g
2025-07-01 17:49:06.036
2025-07-01 17:49:06.037 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.037 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.037
2025-07-01 17:49:06.037 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.037 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.037 alo = 94, ahi = 1101
2025-07-01 17:49:06.037 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.037 blo = 94, bhi = 1101
2025-07-01 17:49:06.037
2025-07-01 17:49:06.037 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.037 r"""
2025-07-01 17:49:06.037 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.037 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.037 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.037 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.037
2025-07-01 17:49:06.037 Example:
2025-07-01 17:49:06.037
2025-07-01 17:49:06.037 >>> d = Differ()
2025-07-01 17:49:06.037 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.038 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.038 >>> print(''.join(results), end="")
2025-07-01 17:49:06.038 - abcDefghiJkl
2025-07-01 17:49:06.038 + abcdefGhijkl
2025-07-01 17:49:06.038 """
2025-07-01 17:49:06.038
2025-07-01 17:49:06.038 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.038 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.038 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.038 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.038 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.038
2025-07-01 17:49:06.038 # search for the pair that matches best without being identical
2025-07-01 17:49:06.038 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.038 # on junk -- unless we have to)
2025-07-01 17:49:06.038 for j in range(blo, bhi):
2025-07-01 17:49:06.038 bj = b[j]
2025-07-01 17:49:06.038 cruncher.set_seq2(bj)
2025-07-01 17:49:06.038 for i in range(alo, ahi):
2025-07-01 17:49:06.039 ai = a[i]
2025-07-01 17:49:06.039 if ai == bj:
2025-07-01 17:49:06.039 if eqi is None:
2025-07-01 17:49:06.039 eqi, eqj = i, j
2025-07-01 17:49:06.039 continue
2025-07-01 17:49:06.039 cruncher.set_seq1(ai)
2025-07-01 17:49:06.039 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.039 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.039 # compares by a factor of 3.
2025-07-01 17:49:06.039 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.039 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.039 # of the computation is cached by cruncher
2025-07-01 17:49:06.039 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.039 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.039 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.039 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.039 if best_ratio < cutoff:
2025-07-01 17:49:06.039 # no non-identical "pretty close" pair
2025-07-01 17:49:06.039 if eqi is None:
2025-07-01 17:49:06.039 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.040 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.044 return
2025-07-01 17:49:06.044 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.045 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.045 else:
2025-07-01 17:49:06.045 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.045 eqi = None
2025-07-01 17:49:06.045
2025-07-01 17:49:06.045 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.045 # identical
2025-07-01 17:49:06.045
2025-07-01 17:49:06.045 # pump out diffs from before the synch point
2025-07-01 17:49:06.045 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.045
2025-07-01 17:49:06.045 # do intraline marking on the synch pair
2025-07-01 17:49:06.045 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.045 if eqi is None:
2025-07-01 17:49:06.045 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.045 atags = btags = ""
2025-07-01 17:49:06.045 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.045 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.045 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.045 if tag == 'replace':
2025-07-01 17:49:06.046 atags += '^' * la
2025-07-01 17:49:06.046 btags += '^' * lb
2025-07-01 17:49:06.046 elif tag == 'delete':
2025-07-01 17:49:06.046 atags += '-' * la
2025-07-01 17:49:06.046 elif tag == 'insert':
2025-07-01 17:49:06.046 btags += '+' * lb
2025-07-01 17:49:06.046 elif tag == 'equal':
2025-07-01 17:49:06.046 atags += ' ' * la
2025-07-01 17:49:06.046 btags += ' ' * lb
2025-07-01 17:49:06.046 else:
2025-07-01 17:49:06.046 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.046 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.046 else:
2025-07-01 17:49:06.046 # the synch pair is identical
2025-07-01 17:49:06.046 yield ' ' + aelt
2025-07-01 17:49:06.046
2025-07-01 17:49:06.046 # pump out diffs from after the synch point
2025-07-01 17:49:06.046 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.046
2025-07-01 17:49:06.046 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.046 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.047
2025-07-01 17:49:06.047 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.047 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.047 alo = 95, ahi = 1101
2025-07-01 17:49:06.047 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.047 blo = 95, bhi = 1101
2025-07-01 17:49:06.047
2025-07-01 17:49:06.047 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.047 g = []
2025-07-01 17:49:06.047 if alo < ahi:
2025-07-01 17:49:06.047 if blo < bhi:
2025-07-01 17:49:06.047 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.047 else:
2025-07-01 17:49:06.047 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.047 elif blo < bhi:
2025-07-01 17:49:06.047 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.047
2025-07-01 17:49:06.047 > yield from g
2025-07-01 17:49:06.047
2025-07-01 17:49:06.048 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.048 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.048
2025-07-01 17:49:06.048 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.048 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.048 alo = 95, ahi = 1101
2025-07-01 17:49:06.048 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.048 blo = 95, bhi = 1101
2025-07-01 17:49:06.048
2025-07-01 17:49:06.048 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.048 r"""
2025-07-01 17:49:06.048 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.048 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.048 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.048 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.048
2025-07-01 17:49:06.048 Example:
2025-07-01 17:49:06.048
2025-07-01 17:49:06.048 >>> d = Differ()
2025-07-01 17:49:06.048 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.049 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.049 >>> print(''.join(results), end="")
2025-07-01 17:49:06.049 - abcDefghiJkl
2025-07-01 17:49:06.049 + abcdefGhijkl
2025-07-01 17:49:06.049 """
2025-07-01 17:49:06.049
2025-07-01 17:49:06.049 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.049 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.049 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.049 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.049 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.049
2025-07-01 17:49:06.049 # search for the pair that matches best without being identical
2025-07-01 17:49:06.049 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.049 # on junk -- unless we have to)
2025-07-01 17:49:06.049 for j in range(blo, bhi):
2025-07-01 17:49:06.049 bj = b[j]
2025-07-01 17:49:06.049 cruncher.set_seq2(bj)
2025-07-01 17:49:06.049 for i in range(alo, ahi):
2025-07-01 17:49:06.050 ai = a[i]
2025-07-01 17:49:06.050 if ai == bj:
2025-07-01 17:49:06.050 if eqi is None:
2025-07-01 17:49:06.050 eqi, eqj = i, j
2025-07-01 17:49:06.050 continue
2025-07-01 17:49:06.050 cruncher.set_seq1(ai)
2025-07-01 17:49:06.050 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.050 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.050 # compares by a factor of 3.
2025-07-01 17:49:06.050 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.050 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.050 # of the computation is cached by cruncher
2025-07-01 17:49:06.050 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.050 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.050 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.050 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.050 if best_ratio < cutoff:
2025-07-01 17:49:06.050 # no non-identical "pretty close" pair
2025-07-01 17:49:06.050 if eqi is None:
2025-07-01 17:49:06.050 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.051 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.051 return
2025-07-01 17:49:06.051 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.051 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.051 else:
2025-07-01 17:49:06.051 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.051 eqi = None
2025-07-01 17:49:06.051
2025-07-01 17:49:06.051 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.051 # identical
2025-07-01 17:49:06.051
2025-07-01 17:49:06.051 # pump out diffs from before the synch point
2025-07-01 17:49:06.051 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.051
2025-07-01 17:49:06.051 # do intraline marking on the synch pair
2025-07-01 17:49:06.051 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.051 if eqi is None:
2025-07-01 17:49:06.051 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.051 atags = btags = ""
2025-07-01 17:49:06.051 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.051 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.052 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.052 if tag == 'replace':
2025-07-01 17:49:06.052 atags += '^' * la
2025-07-01 17:49:06.052 btags += '^' * lb
2025-07-01 17:49:06.052 elif tag == 'delete':
2025-07-01 17:49:06.052 atags += '-' * la
2025-07-01 17:49:06.052 elif tag == 'insert':
2025-07-01 17:49:06.052 btags += '+' * lb
2025-07-01 17:49:06.052 elif tag == 'equal':
2025-07-01 17:49:06.052 atags += ' ' * la
2025-07-01 17:49:06.052 btags += ' ' * lb
2025-07-01 17:49:06.052 else:
2025-07-01 17:49:06.052 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.052 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.052 else:
2025-07-01 17:49:06.052 # the synch pair is identical
2025-07-01 17:49:06.052 yield ' ' + aelt
2025-07-01 17:49:06.052
2025-07-01 17:49:06.052 # pump out diffs from after the synch point
2025-07-01 17:49:06.052 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.053
2025-07-01 17:49:06.053 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.053 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.053
2025-07-01 17:49:06.053 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.053 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.053 alo = 96, ahi = 1101
2025-07-01 17:49:06.053 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.053 blo = 96, bhi = 1101
2025-07-01 17:49:06.053
2025-07-01 17:49:06.053 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.053 g = []
2025-07-01 17:49:06.053 if alo < ahi:
2025-07-01 17:49:06.053 if blo < bhi:
2025-07-01 17:49:06.053 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.053 else:
2025-07-01 17:49:06.053 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.053 elif blo < bhi:
2025-07-01 17:49:06.053 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.053
2025-07-01 17:49:06.053 > yield from g
2025-07-01 17:49:06.054
2025-07-01 17:49:06.054 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.054 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.054
2025-07-01 17:49:06.054 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.054 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.054 alo = 96, ahi = 1101
2025-07-01 17:49:06.054 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.054 blo = 96, bhi = 1101
2025-07-01 17:49:06.054
2025-07-01 17:49:06.054 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.054 r"""
2025-07-01 17:49:06.054 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.054 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.054 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.054 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.054
2025-07-01 17:49:06.054 Example:
2025-07-01 17:49:06.054
2025-07-01 17:49:06.054 >>> d = Differ()
2025-07-01 17:49:06.055 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.058 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.058 >>> print(''.join(results), end="")
2025-07-01 17:49:06.058 - abcDefghiJkl
2025-07-01 17:49:06.058 + abcdefGhijkl
2025-07-01 17:49:06.058 """
2025-07-01 17:49:06.058
2025-07-01 17:49:06.058 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.058 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.058 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.058 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.058 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.058
2025-07-01 17:49:06.058 # search for the pair that matches best without being identical
2025-07-01 17:49:06.058 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.058 # on junk -- unless we have to)
2025-07-01 17:49:06.058 for j in range(blo, bhi):
2025-07-01 17:49:06.058 bj = b[j]
2025-07-01 17:49:06.059 cruncher.set_seq2(bj)
2025-07-01 17:49:06.059 for i in range(alo, ahi):
2025-07-01 17:49:06.059 ai = a[i]
2025-07-01 17:49:06.059 if ai == bj:
2025-07-01 17:49:06.059 if eqi is None:
2025-07-01 17:49:06.059 eqi, eqj = i, j
2025-07-01 17:49:06.059 continue
2025-07-01 17:49:06.059 cruncher.set_seq1(ai)
2025-07-01 17:49:06.059 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.059 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.059 # compares by a factor of 3.
2025-07-01 17:49:06.059 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.059 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.059 # of the computation is cached by cruncher
2025-07-01 17:49:06.059 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.059 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.059 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.059 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.059 if best_ratio < cutoff:
2025-07-01 17:49:06.060 # no non-identical "pretty close" pair
2025-07-01 17:49:06.060 if eqi is None:
2025-07-01 17:49:06.060 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.060 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.060 return
2025-07-01 17:49:06.060 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.060 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.060 else:
2025-07-01 17:49:06.060 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.060 eqi = None
2025-07-01 17:49:06.060
2025-07-01 17:49:06.060 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.060 # identical
2025-07-01 17:49:06.060
2025-07-01 17:49:06.060 # pump out diffs from before the synch point
2025-07-01 17:49:06.060 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.060
2025-07-01 17:49:06.060 # do intraline marking on the synch pair
2025-07-01 17:49:06.060 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.060 if eqi is None:
2025-07-01 17:49:06.061 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.061 atags = btags = ""
2025-07-01 17:49:06.061 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.061 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.061 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.061 if tag == 'replace':
2025-07-01 17:49:06.061 atags += '^' * la
2025-07-01 17:49:06.061 btags += '^' * lb
2025-07-01 17:49:06.061 elif tag == 'delete':
2025-07-01 17:49:06.061 atags += '-' * la
2025-07-01 17:49:06.061 elif tag == 'insert':
2025-07-01 17:49:06.061 btags += '+' * lb
2025-07-01 17:49:06.061 elif tag == 'equal':
2025-07-01 17:49:06.061 atags += ' ' * la
2025-07-01 17:49:06.061 btags += ' ' * lb
2025-07-01 17:49:06.061 else:
2025-07-01 17:49:06.061 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.061 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.061 else:
2025-07-01 17:49:06.061 # the synch pair is identical
2025-07-01 17:49:06.061 yield ' ' + aelt
2025-07-01 17:49:06.062
2025-07-01 17:49:06.062 # pump out diffs from after the synch point
2025-07-01 17:49:06.062 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.062
2025-07-01 17:49:06.062 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.062 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.062
2025-07-01 17:49:06.062 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.062 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.062 alo = 97, ahi = 1101
2025-07-01 17:49:06.062 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.062 blo = 97, bhi = 1101
2025-07-01 17:49:06.062
2025-07-01 17:49:06.062 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.062 g = []
2025-07-01 17:49:06.062 if alo < ahi:
2025-07-01 17:49:06.062 if blo < bhi:
2025-07-01 17:49:06.062 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.062 else:
2025-07-01 17:49:06.062 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.063 elif blo < bhi:
2025-07-01 17:49:06.063 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.063
2025-07-01 17:49:06.063 > yield from g
2025-07-01 17:49:06.063
2025-07-01 17:49:06.063 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.063 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.063
2025-07-01 17:49:06.063 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.063 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.063 alo = 97, ahi = 1101
2025-07-01 17:49:06.063 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.063 blo = 97, bhi = 1101
2025-07-01 17:49:06.063
2025-07-01 17:49:06.063 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.063 r"""
2025-07-01 17:49:06.063 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.063 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.063 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.063 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.064
2025-07-01 17:49:06.064 Example:
2025-07-01 17:49:06.064
2025-07-01 17:49:06.064 >>> d = Differ()
2025-07-01 17:49:06.064 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.064 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.064 >>> print(''.join(results), end="")
2025-07-01 17:49:06.064 - abcDefghiJkl
2025-07-01 17:49:06.064 + abcdefGhijkl
2025-07-01 17:49:06.064 """
2025-07-01 17:49:06.064
2025-07-01 17:49:06.064 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.064 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.064 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.064 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.064 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.064
2025-07-01 17:49:06.064 # search for the pair that matches best without being identical
2025-07-01 17:49:06.064 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.065 # on junk -- unless we have to)
2025-07-01 17:49:06.065 for j in range(blo, bhi):
2025-07-01 17:49:06.065 bj = b[j]
2025-07-01 17:49:06.065 cruncher.set_seq2(bj)
2025-07-01 17:49:06.065 for i in range(alo, ahi):
2025-07-01 17:49:06.065 ai = a[i]
2025-07-01 17:49:06.065 if ai == bj:
2025-07-01 17:49:06.065 if eqi is None:
2025-07-01 17:49:06.065 eqi, eqj = i, j
2025-07-01 17:49:06.065 continue
2025-07-01 17:49:06.065 cruncher.set_seq1(ai)
2025-07-01 17:49:06.065 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.065 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.065 # compares by a factor of 3.
2025-07-01 17:49:06.065 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.065 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.065 # of the computation is cached by cruncher
2025-07-01 17:49:06.065 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.065 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.065 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.065 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.066 if best_ratio < cutoff:
2025-07-01 17:49:06.066 # no non-identical "pretty close" pair
2025-07-01 17:49:06.066 if eqi is None:
2025-07-01 17:49:06.066 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.066 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.066 return
2025-07-01 17:49:06.066 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.066 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.066 else:
2025-07-01 17:49:06.066 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.066 eqi = None
2025-07-01 17:49:06.066
2025-07-01 17:49:06.066 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.066 # identical
2025-07-01 17:49:06.066
2025-07-01 17:49:06.066 # pump out diffs from before the synch point
2025-07-01 17:49:06.066 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.066
2025-07-01 17:49:06.066 # do intraline marking on the synch pair
2025-07-01 17:49:06.066 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.066 if eqi is None:
2025-07-01 17:49:06.067 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.067 atags = btags = ""
2025-07-01 17:49:06.067 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.067 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.067 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.067 if tag == 'replace':
2025-07-01 17:49:06.067 atags += '^' * la
2025-07-01 17:49:06.067 btags += '^' * lb
2025-07-01 17:49:06.067 elif tag == 'delete':
2025-07-01 17:49:06.067 atags += '-' * la
2025-07-01 17:49:06.067 elif tag == 'insert':
2025-07-01 17:49:06.067 btags += '+' * lb
2025-07-01 17:49:06.067 elif tag == 'equal':
2025-07-01 17:49:06.067 atags += ' ' * la
2025-07-01 17:49:06.067 btags += ' ' * lb
2025-07-01 17:49:06.067 else:
2025-07-01 17:49:06.067 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.067 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.067 else:
2025-07-01 17:49:06.067 # the synch pair is identical
2025-07-01 17:49:06.068 yield ' ' + aelt
2025-07-01 17:49:06.068
2025-07-01 17:49:06.068 # pump out diffs from after the synch point
2025-07-01 17:49:06.068 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.068
2025-07-01 17:49:06.068 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.068 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.068
2025-07-01 17:49:06.068 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.068 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.068 alo = 98, ahi = 1101
2025-07-01 17:49:06.068 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.068 blo = 98, bhi = 1101
2025-07-01 17:49:06.068
2025-07-01 17:49:06.068 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.068 g = []
2025-07-01 17:49:06.068 if alo < ahi:
2025-07-01 17:49:06.068 if blo < bhi:
2025-07-01 17:49:06.068 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.068 else:
2025-07-01 17:49:06.068 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.069 elif blo < bhi:
2025-07-01 17:49:06.069 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.069
2025-07-01 17:49:06.069 > yield from g
2025-07-01 17:49:06.069
2025-07-01 17:49:06.069 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.069 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.069
2025-07-01 17:49:06.069 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.069 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.069 alo = 98, ahi = 1101
2025-07-01 17:49:06.069 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.069 blo = 98, bhi = 1101
2025-07-01 17:49:06.069
2025-07-01 17:49:06.069 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.069 r"""
2025-07-01 17:49:06.069 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.069 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.069 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.069 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.070
2025-07-01 17:49:06.070 Example:
2025-07-01 17:49:06.070
2025-07-01 17:49:06.070 >>> d = Differ()
2025-07-01 17:49:06.070 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.070 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.070 >>> print(''.join(results), end="")
2025-07-01 17:49:06.070 - abcDefghiJkl
2025-07-01 17:49:06.070 + abcdefGhijkl
2025-07-01 17:49:06.070 """
2025-07-01 17:49:06.070
2025-07-01 17:49:06.070 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.070 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.070 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.070 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.070 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.070
2025-07-01 17:49:06.070 # search for the pair that matches best without being identical
2025-07-01 17:49:06.070 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.071 # on junk -- unless we have to)
2025-07-01 17:49:06.075 for j in range(blo, bhi):
2025-07-01 17:49:06.075 bj = b[j]
2025-07-01 17:49:06.075 cruncher.set_seq2(bj)
2025-07-01 17:49:06.075 for i in range(alo, ahi):
2025-07-01 17:49:06.075 ai = a[i]
2025-07-01 17:49:06.076 if ai == bj:
2025-07-01 17:49:06.076 if eqi is None:
2025-07-01 17:49:06.076 eqi, eqj = i, j
2025-07-01 17:49:06.076 continue
2025-07-01 17:49:06.076 cruncher.set_seq1(ai)
2025-07-01 17:49:06.076 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.076 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.076 # compares by a factor of 3.
2025-07-01 17:49:06.076 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.076 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.076 # of the computation is cached by cruncher
2025-07-01 17:49:06.076 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.076 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.076 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.076 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.076 if best_ratio < cutoff:
2025-07-01 17:49:06.076 # no non-identical "pretty close" pair
2025-07-01 17:49:06.076 if eqi is None:
2025-07-01 17:49:06.076 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.076 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.077 return
2025-07-01 17:49:06.077 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.077 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.077 else:
2025-07-01 17:49:06.077 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.077 eqi = None
2025-07-01 17:49:06.077
2025-07-01 17:49:06.077 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.077 # identical
2025-07-01 17:49:06.077
2025-07-01 17:49:06.077 # pump out diffs from before the synch point
2025-07-01 17:49:06.077 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.077
2025-07-01 17:49:06.077 # do intraline marking on the synch pair
2025-07-01 17:49:06.077 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.077 if eqi is None:
2025-07-01 17:49:06.077 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.077 atags = btags = ""
2025-07-01 17:49:06.077 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.077 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.077 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.078 if tag == 'replace':
2025-07-01 17:49:06.078 atags += '^' * la
2025-07-01 17:49:06.078 btags += '^' * lb
2025-07-01 17:49:06.078 elif tag == 'delete':
2025-07-01 17:49:06.078 atags += '-' * la
2025-07-01 17:49:06.078 elif tag == 'insert':
2025-07-01 17:49:06.078 btags += '+' * lb
2025-07-01 17:49:06.078 elif tag == 'equal':
2025-07-01 17:49:06.078 atags += ' ' * la
2025-07-01 17:49:06.078 btags += ' ' * lb
2025-07-01 17:49:06.078 else:
2025-07-01 17:49:06.078 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.078 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.078 else:
2025-07-01 17:49:06.078 # the synch pair is identical
2025-07-01 17:49:06.078 yield ' ' + aelt
2025-07-01 17:49:06.078
2025-07-01 17:49:06.078 # pump out diffs from after the synch point
2025-07-01 17:49:06.078 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.078
2025-07-01 17:49:06.079 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.079 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.079
2025-07-01 17:49:06.079 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.079 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.079 alo = 99, ahi = 1101
2025-07-01 17:49:06.079 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.079 blo = 99, bhi = 1101
2025-07-01 17:49:06.079
2025-07-01 17:49:06.079 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.079 g = []
2025-07-01 17:49:06.079 if alo < ahi:
2025-07-01 17:49:06.079 if blo < bhi:
2025-07-01 17:49:06.079 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.079 else:
2025-07-01 17:49:06.079 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.079 elif blo < bhi:
2025-07-01 17:49:06.079 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.079
2025-07-01 17:49:06.079 > yield from g
2025-07-01 17:49:06.079
2025-07-01 17:49:06.080 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.080 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.080
2025-07-01 17:49:06.080 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.080 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.080 alo = 99, ahi = 1101
2025-07-01 17:49:06.080 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.080 blo = 99, bhi = 1101
2025-07-01 17:49:06.080
2025-07-01 17:49:06.080 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.080 r"""
2025-07-01 17:49:06.080 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.080 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.080 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.080 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.080
2025-07-01 17:49:06.080 Example:
2025-07-01 17:49:06.080
2025-07-01 17:49:06.081 >>> d = Differ()
2025-07-01 17:49:06.081 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.081 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.081 >>> print(''.join(results), end="")
2025-07-01 17:49:06.081 - abcDefghiJkl
2025-07-01 17:49:06.081 + abcdefGhijkl
2025-07-01 17:49:06.081 """
2025-07-01 17:49:06.081
2025-07-01 17:49:06.081 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.081 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.081 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.081 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.081 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.081
2025-07-01 17:49:06.081 # search for the pair that matches best without being identical
2025-07-01 17:49:06.081 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.081 # on junk -- unless we have to)
2025-07-01 17:49:06.081 for j in range(blo, bhi):
2025-07-01 17:49:06.081 bj = b[j]
2025-07-01 17:49:06.082 cruncher.set_seq2(bj)
2025-07-01 17:49:06.082 for i in range(alo, ahi):
2025-07-01 17:49:06.082 ai = a[i]
2025-07-01 17:49:06.082 if ai == bj:
2025-07-01 17:49:06.082 if eqi is None:
2025-07-01 17:49:06.082 eqi, eqj = i, j
2025-07-01 17:49:06.082 continue
2025-07-01 17:49:06.082 cruncher.set_seq1(ai)
2025-07-01 17:49:06.082 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.082 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.082 # compares by a factor of 3.
2025-07-01 17:49:06.082 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.082 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.082 # of the computation is cached by cruncher
2025-07-01 17:49:06.082 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.082 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.082 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.082 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.082 if best_ratio < cutoff:
2025-07-01 17:49:06.082 # no non-identical "pretty close" pair
2025-07-01 17:49:06.083 if eqi is None:
2025-07-01 17:49:06.083 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.083 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.083 return
2025-07-01 17:49:06.083 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.083 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.083 else:
2025-07-01 17:49:06.083 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.083 eqi = None
2025-07-01 17:49:06.083
2025-07-01 17:49:06.083 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.083 # identical
2025-07-01 17:49:06.083
2025-07-01 17:49:06.083 # pump out diffs from before the synch point
2025-07-01 17:49:06.083 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.083
2025-07-01 17:49:06.083 # do intraline marking on the synch pair
2025-07-01 17:49:06.083 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.083 if eqi is None:
2025-07-01 17:49:06.083 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.083 atags = btags = ""
2025-07-01 17:49:06.084 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.084 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.084 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.084 if tag == 'replace':
2025-07-01 17:49:06.084 atags += '^' * la
2025-07-01 17:49:06.084 btags += '^' * lb
2025-07-01 17:49:06.084 elif tag == 'delete':
2025-07-01 17:49:06.084 atags += '-' * la
2025-07-01 17:49:06.084 elif tag == 'insert':
2025-07-01 17:49:06.084 btags += '+' * lb
2025-07-01 17:49:06.084 elif tag == 'equal':
2025-07-01 17:49:06.084 atags += ' ' * la
2025-07-01 17:49:06.084 btags += ' ' * lb
2025-07-01 17:49:06.084 else:
2025-07-01 17:49:06.084 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.084 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.084 else:
2025-07-01 17:49:06.084 # the synch pair is identical
2025-07-01 17:49:06.084 yield ' ' + aelt
2025-07-01 17:49:06.084
2025-07-01 17:49:06.084 # pump out diffs from after the synch point
2025-07-01 17:49:06.085 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.085
2025-07-01 17:49:06.085 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.085 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.085
2025-07-01 17:49:06.085 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.085 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.085 alo = 100, ahi = 1101
2025-07-01 17:49:06.085 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.085 blo = 100, bhi = 1101
2025-07-01 17:49:06.085
2025-07-01 17:49:06.085 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.085 g = []
2025-07-01 17:49:06.085 if alo < ahi:
2025-07-01 17:49:06.085 if blo < bhi:
2025-07-01 17:49:06.085 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.085 else:
2025-07-01 17:49:06.085 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.085 elif blo < bhi:
2025-07-01 17:49:06.085 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.086
2025-07-01 17:49:06.089 > yield from g
2025-07-01 17:49:06.089
2025-07-01 17:49:06.089 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.089 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.089
2025-07-01 17:49:06.089 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.089 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.089 alo = 100, ahi = 1101
2025-07-01 17:49:06.089 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.089 blo = 100, bhi = 1101
2025-07-01 17:49:06.089
2025-07-01 17:49:06.089 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.089 r"""
2025-07-01 17:49:06.089 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.089 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.089 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.089 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.089
2025-07-01 17:49:06.089 Example:
2025-07-01 17:49:06.090
2025-07-01 17:49:06.090 >>> d = Differ()
2025-07-01 17:49:06.090 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.090 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.090 >>> print(''.join(results), end="")
2025-07-01 17:49:06.090 - abcDefghiJkl
2025-07-01 17:49:06.090 + abcdefGhijkl
2025-07-01 17:49:06.090 """
2025-07-01 17:49:06.090
2025-07-01 17:49:06.090 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.090 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.090 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.090 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.090 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.090
2025-07-01 17:49:06.090 # search for the pair that matches best without being identical
2025-07-01 17:49:06.090 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.090 # on junk -- unless we have to)
2025-07-01 17:49:06.090 for j in range(blo, bhi):
2025-07-01 17:49:06.091 bj = b[j]
2025-07-01 17:49:06.091 cruncher.set_seq2(bj)
2025-07-01 17:49:06.091 for i in range(alo, ahi):
2025-07-01 17:49:06.091 ai = a[i]
2025-07-01 17:49:06.091 if ai == bj:
2025-07-01 17:49:06.091 if eqi is None:
2025-07-01 17:49:06.091 eqi, eqj = i, j
2025-07-01 17:49:06.091 continue
2025-07-01 17:49:06.091 cruncher.set_seq1(ai)
2025-07-01 17:49:06.091 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.091 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.091 # compares by a factor of 3.
2025-07-01 17:49:06.091 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.091 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.091 # of the computation is cached by cruncher
2025-07-01 17:49:06.091 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.091 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.091 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.091 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.091 if best_ratio < cutoff:
2025-07-01 17:49:06.092 # no non-identical "pretty close" pair
2025-07-01 17:49:06.092 if eqi is None:
2025-07-01 17:49:06.092 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.092 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.092 return
2025-07-01 17:49:06.092 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.092 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.092 else:
2025-07-01 17:49:06.092 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.092 eqi = None
2025-07-01 17:49:06.092
2025-07-01 17:49:06.092 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.092 # identical
2025-07-01 17:49:06.092
2025-07-01 17:49:06.092 # pump out diffs from before the synch point
2025-07-01 17:49:06.092 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.092
2025-07-01 17:49:06.092 # do intraline marking on the synch pair
2025-07-01 17:49:06.092 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.093 if eqi is None:
2025-07-01 17:49:06.093 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.093 atags = btags = ""
2025-07-01 17:49:06.093 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.093 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.093 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.093 if tag == 'replace':
2025-07-01 17:49:06.093 atags += '^' * la
2025-07-01 17:49:06.093 btags += '^' * lb
2025-07-01 17:49:06.093 elif tag == 'delete':
2025-07-01 17:49:06.093 atags += '-' * la
2025-07-01 17:49:06.093 elif tag == 'insert':
2025-07-01 17:49:06.093 btags += '+' * lb
2025-07-01 17:49:06.093 elif tag == 'equal':
2025-07-01 17:49:06.093 atags += ' ' * la
2025-07-01 17:49:06.093 btags += ' ' * lb
2025-07-01 17:49:06.093 else:
2025-07-01 17:49:06.093 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.093 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.093 else:
2025-07-01 17:49:06.093 # the synch pair is identical
2025-07-01 17:49:06.094 yield ' ' + aelt
2025-07-01 17:49:06.094
2025-07-01 17:49:06.094 # pump out diffs from after the synch point
2025-07-01 17:49:06.094 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.094
2025-07-01 17:49:06.094 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.094 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.094
2025-07-01 17:49:06.094 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.094 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.094 alo = 101, ahi = 1101
2025-07-01 17:49:06.094 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.094 blo = 101, bhi = 1101
2025-07-01 17:49:06.094
2025-07-01 17:49:06.094 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.094 g = []
2025-07-01 17:49:06.094 if alo < ahi:
2025-07-01 17:49:06.094 if blo < bhi:
2025-07-01 17:49:06.094 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.094 else:
2025-07-01 17:49:06.095 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.095 elif blo < bhi:
2025-07-01 17:49:06.095 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.095
2025-07-01 17:49:06.095 > yield from g
2025-07-01 17:49:06.095
2025-07-01 17:49:06.095 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.095 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.095
2025-07-01 17:49:06.095 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.095 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.095 alo = 101, ahi = 1101
2025-07-01 17:49:06.095 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.095 blo = 101, bhi = 1101
2025-07-01 17:49:06.095
2025-07-01 17:49:06.095 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.095 r"""
2025-07-01 17:49:06.095 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.095 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.095 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.095 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.096
2025-07-01 17:49:06.096 Example:
2025-07-01 17:49:06.096
2025-07-01 17:49:06.096 >>> d = Differ()
2025-07-01 17:49:06.096 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.096 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.096 >>> print(''.join(results), end="")
2025-07-01 17:49:06.096 - abcDefghiJkl
2025-07-01 17:49:06.096 + abcdefGhijkl
2025-07-01 17:49:06.096 """
2025-07-01 17:49:06.096
2025-07-01 17:49:06.096 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.096 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.096 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.096 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.096 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.096
2025-07-01 17:49:06.096 # search for the pair that matches best without being identical
2025-07-01 17:49:06.097 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.097 # on junk -- unless we have to)
2025-07-01 17:49:06.097 for j in range(blo, bhi):
2025-07-01 17:49:06.097 bj = b[j]
2025-07-01 17:49:06.097 cruncher.set_seq2(bj)
2025-07-01 17:49:06.097 for i in range(alo, ahi):
2025-07-01 17:49:06.097 ai = a[i]
2025-07-01 17:49:06.097 if ai == bj:
2025-07-01 17:49:06.097 if eqi is None:
2025-07-01 17:49:06.097 eqi, eqj = i, j
2025-07-01 17:49:06.097 continue
2025-07-01 17:49:06.097 cruncher.set_seq1(ai)
2025-07-01 17:49:06.097 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.097 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.097 # compares by a factor of 3.
2025-07-01 17:49:06.097 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.097 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.097 # of the computation is cached by cruncher
2025-07-01 17:49:06.097 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.097 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.097 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.098 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.098 if best_ratio < cutoff:
2025-07-01 17:49:06.098 # no non-identical "pretty close" pair
2025-07-01 17:49:06.098 if eqi is None:
2025-07-01 17:49:06.098 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.098 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.098 return
2025-07-01 17:49:06.098 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.098 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.098 else:
2025-07-01 17:49:06.098 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.098 eqi = None
2025-07-01 17:49:06.098
2025-07-01 17:49:06.098 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.098 # identical
2025-07-01 17:49:06.098
2025-07-01 17:49:06.098 # pump out diffs from before the synch point
2025-07-01 17:49:06.098 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.098
2025-07-01 17:49:06.098 # do intraline marking on the synch pair
2025-07-01 17:49:06.098 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.099 if eqi is None:
2025-07-01 17:49:06.099 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.099 atags = btags = ""
2025-07-01 17:49:06.099 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.099 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.099 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.099 if tag == 'replace':
2025-07-01 17:49:06.099 atags += '^' * la
2025-07-01 17:49:06.099 btags += '^' * lb
2025-07-01 17:49:06.099 elif tag == 'delete':
2025-07-01 17:49:06.099 atags += '-' * la
2025-07-01 17:49:06.099 elif tag == 'insert':
2025-07-01 17:49:06.099 btags += '+' * lb
2025-07-01 17:49:06.099 elif tag == 'equal':
2025-07-01 17:49:06.099 atags += ' ' * la
2025-07-01 17:49:06.099 btags += ' ' * lb
2025-07-01 17:49:06.099 else:
2025-07-01 17:49:06.099 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.099 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.099 else:
2025-07-01 17:49:06.099 # the synch pair is identical
2025-07-01 17:49:06.100 yield ' ' + aelt
2025-07-01 17:49:06.100
2025-07-01 17:49:06.100 # pump out diffs from after the synch point
2025-07-01 17:49:06.100 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.100
2025-07-01 17:49:06.100 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.100
2025-07-01 17:49:06.100 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.100 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.100 alo = 102, ahi = 1101
2025-07-01 17:49:06.100 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.100 blo = 102, bhi = 1101
2025-07-01 17:49:06.100
2025-07-01 17:49:06.100 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.100 g = []
2025-07-01 17:49:06.100 if alo < ahi:
2025-07-01 17:49:06.100 if blo < bhi:
2025-07-01 17:49:06.100 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.100 else:
2025-07-01 17:49:06.106 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.106 elif blo < bhi:
2025-07-01 17:49:06.106 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.106
2025-07-01 17:49:06.107 > yield from g
2025-07-01 17:49:06.107
2025-07-01 17:49:06.107 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.107 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.107
2025-07-01 17:49:06.107 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.107 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.107 alo = 102, ahi = 1101
2025-07-01 17:49:06.107 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.107 blo = 102, bhi = 1101
2025-07-01 17:49:06.107
2025-07-01 17:49:06.107 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.107 r"""
2025-07-01 17:49:06.107 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.107 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.107 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.107 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.107
2025-07-01 17:49:06.107 Example:
2025-07-01 17:49:06.107
2025-07-01 17:49:06.108 >>> d = Differ()
2025-07-01 17:49:06.108 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.108 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.108 >>> print(''.join(results), end="")
2025-07-01 17:49:06.108 - abcDefghiJkl
2025-07-01 17:49:06.108 + abcdefGhijkl
2025-07-01 17:49:06.108 """
2025-07-01 17:49:06.108
2025-07-01 17:49:06.108 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.108 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.108 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.108 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.108 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.108
2025-07-01 17:49:06.109 # search for the pair that matches best without being identical
2025-07-01 17:49:06.109 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.109 # on junk -- unless we have to)
2025-07-01 17:49:06.109 for j in range(blo, bhi):
2025-07-01 17:49:06.109 bj = b[j]
2025-07-01 17:49:06.109 cruncher.set_seq2(bj)
2025-07-01 17:49:06.109 for i in range(alo, ahi):
2025-07-01 17:49:06.109 ai = a[i]
2025-07-01 17:49:06.109 if ai == bj:
2025-07-01 17:49:06.109 if eqi is None:
2025-07-01 17:49:06.109 eqi, eqj = i, j
2025-07-01 17:49:06.109 continue
2025-07-01 17:49:06.109 cruncher.set_seq1(ai)
2025-07-01 17:49:06.109 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.109 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.109 # compares by a factor of 3.
2025-07-01 17:49:06.109 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.110 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.110 # of the computation is cached by cruncher
2025-07-01 17:49:06.110 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.110 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.110 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.110 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.110 if best_ratio < cutoff:
2025-07-01 17:49:06.110 # no non-identical "pretty close" pair
2025-07-01 17:49:06.110 if eqi is None:
2025-07-01 17:49:06.110 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.110 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.110 return
2025-07-01 17:49:06.110 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.110 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.110 else:
2025-07-01 17:49:06.110 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.111 eqi = None
2025-07-01 17:49:06.111
2025-07-01 17:49:06.111 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.111 # identical
2025-07-01 17:49:06.111
2025-07-01 17:49:06.111 # pump out diffs from before the synch point
2025-07-01 17:49:06.111 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.111
2025-07-01 17:49:06.111 # do intraline marking on the synch pair
2025-07-01 17:49:06.111 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.111 if eqi is None:
2025-07-01 17:49:06.111 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.111 atags = btags = ""
2025-07-01 17:49:06.111 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.111 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.111 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.111 if tag == 'replace':
2025-07-01 17:49:06.112 atags += '^' * la
2025-07-01 17:49:06.112 btags += '^' * lb
2025-07-01 17:49:06.112 elif tag == 'delete':
2025-07-01 17:49:06.112 atags += '-' * la
2025-07-01 17:49:06.112 elif tag == 'insert':
2025-07-01 17:49:06.112 btags += '+' * lb
2025-07-01 17:49:06.112 elif tag == 'equal':
2025-07-01 17:49:06.112 atags += ' ' * la
2025-07-01 17:49:06.112 btags += ' ' * lb
2025-07-01 17:49:06.112 else:
2025-07-01 17:49:06.112 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.112 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.112 else:
2025-07-01 17:49:06.112 # the synch pair is identical
2025-07-01 17:49:06.112 yield ' ' + aelt
2025-07-01 17:49:06.112
2025-07-01 17:49:06.112 # pump out diffs from after the synch point
2025-07-01 17:49:06.113 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.113
2025-07-01 17:49:06.113 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.113 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.113
2025-07-01 17:49:06.113 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.113 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.113 alo = 103, ahi = 1101
2025-07-01 17:49:06.113 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.113 blo = 103, bhi = 1101
2025-07-01 17:49:06.113
2025-07-01 17:49:06.113 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.113 g = []
2025-07-01 17:49:06.113 if alo < ahi:
2025-07-01 17:49:06.113 if blo < bhi:
2025-07-01 17:49:06.113 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.113 else:
2025-07-01 17:49:06.114 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.114 elif blo < bhi:
2025-07-01 17:49:06.114 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.114
2025-07-01 17:49:06.114 > yield from g
2025-07-01 17:49:06.114
2025-07-01 17:49:06.114 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.114 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.114
2025-07-01 17:49:06.114 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.114 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.114 alo = 103, ahi = 1101
2025-07-01 17:49:06.114 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.114 blo = 103, bhi = 1101
2025-07-01 17:49:06.114
2025-07-01 17:49:06.114 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.115 r"""
2025-07-01 17:49:06.115 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.115 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.115 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.115 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.115
2025-07-01 17:49:06.115 Example:
2025-07-01 17:49:06.115
2025-07-01 17:49:06.115 >>> d = Differ()
2025-07-01 17:49:06.115 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.115 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.115 >>> print(''.join(results), end="")
2025-07-01 17:49:06.115 - abcDefghiJkl
2025-07-01 17:49:06.115 + abcdefGhijkl
2025-07-01 17:49:06.115 """
2025-07-01 17:49:06.116
2025-07-01 17:49:06.116 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.116 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.116 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.116 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.116 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.116
2025-07-01 17:49:06.116 # search for the pair that matches best without being identical
2025-07-01 17:49:06.116 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.116 # on junk -- unless we have to)
2025-07-01 17:49:06.116 for j in range(blo, bhi):
2025-07-01 17:49:06.116 bj = b[j]
2025-07-01 17:49:06.116 cruncher.set_seq2(bj)
2025-07-01 17:49:06.116 for i in range(alo, ahi):
2025-07-01 17:49:06.120 ai = a[i]
2025-07-01 17:49:06.120 if ai == bj:
2025-07-01 17:49:06.120 if eqi is None:
2025-07-01 17:49:06.120 eqi, eqj = i, j
2025-07-01 17:49:06.120 continue
2025-07-01 17:49:06.120 cruncher.set_seq1(ai)
2025-07-01 17:49:06.120 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.120 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.120 # compares by a factor of 3.
2025-07-01 17:49:06.120 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.121 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.121 # of the computation is cached by cruncher
2025-07-01 17:49:06.121 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.121 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.121 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.121 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.121 if best_ratio < cutoff:
2025-07-01 17:49:06.121 # no non-identical "pretty close" pair
2025-07-01 17:49:06.121 if eqi is None:
2025-07-01 17:49:06.121 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.121 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.121 return
2025-07-01 17:49:06.121 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.121 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.121 else:
2025-07-01 17:49:06.121 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.122 eqi = None
2025-07-01 17:49:06.122
2025-07-01 17:49:06.122 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.122 # identical
2025-07-01 17:49:06.122
2025-07-01 17:49:06.122 # pump out diffs from before the synch point
2025-07-01 17:49:06.122 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.122
2025-07-01 17:49:06.122 # do intraline marking on the synch pair
2025-07-01 17:49:06.122 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.122 if eqi is None:
2025-07-01 17:49:06.122 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.122 atags = btags = ""
2025-07-01 17:49:06.122 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.122 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.122 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.122 if tag == 'replace':
2025-07-01 17:49:06.123 atags += '^' * la
2025-07-01 17:49:06.123 btags += '^' * lb
2025-07-01 17:49:06.123 elif tag == 'delete':
2025-07-01 17:49:06.123 atags += '-' * la
2025-07-01 17:49:06.123 elif tag == 'insert':
2025-07-01 17:49:06.123 btags += '+' * lb
2025-07-01 17:49:06.123 elif tag == 'equal':
2025-07-01 17:49:06.123 atags += ' ' * la
2025-07-01 17:49:06.123 btags += ' ' * lb
2025-07-01 17:49:06.123 else:
2025-07-01 17:49:06.123 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.123 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.123 else:
2025-07-01 17:49:06.123 # the synch pair is identical
2025-07-01 17:49:06.123 yield ' ' + aelt
2025-07-01 17:49:06.124
2025-07-01 17:49:06.124 # pump out diffs from after the synch point
2025-07-01 17:49:06.124 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.124
2025-07-01 17:49:06.124 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.124 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.124
2025-07-01 17:49:06.124 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.124 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.124 alo = 104, ahi = 1101
2025-07-01 17:49:06.124 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.124 blo = 104, bhi = 1101
2025-07-01 17:49:06.124
2025-07-01 17:49:06.124 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.124 g = []
2025-07-01 17:49:06.124 if alo < ahi:
2025-07-01 17:49:06.124 if blo < bhi:
2025-07-01 17:49:06.125 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.125 else:
2025-07-01 17:49:06.125 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.125 elif blo < bhi:
2025-07-01 17:49:06.125 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.125
2025-07-01 17:49:06.125 > yield from g
2025-07-01 17:49:06.125
2025-07-01 17:49:06.125 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.125 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.125
2025-07-01 17:49:06.125 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.125 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.125 alo = 104, ahi = 1101
2025-07-01 17:49:06.125 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.125 blo = 104, bhi = 1101
2025-07-01 17:49:06.126
2025-07-01 17:49:06.126 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.126 r"""
2025-07-01 17:49:06.126 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.126 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.126 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.126 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.126
2025-07-01 17:49:06.126 Example:
2025-07-01 17:49:06.126
2025-07-01 17:49:06.126 >>> d = Differ()
2025-07-01 17:49:06.126 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.126 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.126 >>> print(''.join(results), end="")
2025-07-01 17:49:06.126 - abcDefghiJkl
2025-07-01 17:49:06.126 + abcdefGhijkl
2025-07-01 17:49:06.127 """
2025-07-01 17:49:06.127
2025-07-01 17:49:06.127 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.127 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.127 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.127 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.127 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.127
2025-07-01 17:49:06.127 # search for the pair that matches best without being identical
2025-07-01 17:49:06.127 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.127 # on junk -- unless we have to)
2025-07-01 17:49:06.127 for j in range(blo, bhi):
2025-07-01 17:49:06.127 bj = b[j]
2025-07-01 17:49:06.127 cruncher.set_seq2(bj)
2025-07-01 17:49:06.127 for i in range(alo, ahi):
2025-07-01 17:49:06.128 ai = a[i]
2025-07-01 17:49:06.128 if ai == bj:
2025-07-01 17:49:06.128 if eqi is None:
2025-07-01 17:49:06.128 eqi, eqj = i, j
2025-07-01 17:49:06.128 continue
2025-07-01 17:49:06.128 cruncher.set_seq1(ai)
2025-07-01 17:49:06.128 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.128 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.128 # compares by a factor of 3.
2025-07-01 17:49:06.128 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.128 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.128 # of the computation is cached by cruncher
2025-07-01 17:49:06.128 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.128 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.128 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.128 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.129 if best_ratio < cutoff:
2025-07-01 17:49:06.129 # no non-identical "pretty close" pair
2025-07-01 17:49:06.129 if eqi is None:
2025-07-01 17:49:06.129 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.129 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.129 return
2025-07-01 17:49:06.129 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.129 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.129 else:
2025-07-01 17:49:06.129 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.129 eqi = None
2025-07-01 17:49:06.129
2025-07-01 17:49:06.129 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.129 # identical
2025-07-01 17:49:06.129
2025-07-01 17:49:06.129 # pump out diffs from before the synch point
2025-07-01 17:49:06.129 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.129
2025-07-01 17:49:06.130 # do intraline marking on the synch pair
2025-07-01 17:49:06.130 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.130 if eqi is None:
2025-07-01 17:49:06.130 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.130 atags = btags = ""
2025-07-01 17:49:06.130 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.130 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.130 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.130 if tag == 'replace':
2025-07-01 17:49:06.130 atags += '^' * la
2025-07-01 17:49:06.130 btags += '^' * lb
2025-07-01 17:49:06.130 elif tag == 'delete':
2025-07-01 17:49:06.130 atags += '-' * la
2025-07-01 17:49:06.130 elif tag == 'insert':
2025-07-01 17:49:06.131 btags += '+' * lb
2025-07-01 17:49:06.131 elif tag == 'equal':
2025-07-01 17:49:06.131 atags += ' ' * la
2025-07-01 17:49:06.131 btags += ' ' * lb
2025-07-01 17:49:06.131 else:
2025-07-01 17:49:06.131 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.131 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.131 else:
2025-07-01 17:49:06.131 # the synch pair is identical
2025-07-01 17:49:06.131 yield ' ' + aelt
2025-07-01 17:49:06.131
2025-07-01 17:49:06.131 # pump out diffs from after the synch point
2025-07-01 17:49:06.131 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.131
2025-07-01 17:49:06.131 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.131 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.132
2025-07-01 17:49:06.132 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.132 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.132 alo = 105, ahi = 1101
2025-07-01 17:49:06.132 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.132 blo = 105, bhi = 1101
2025-07-01 17:49:06.132
2025-07-01 17:49:06.132 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.132 g = []
2025-07-01 17:49:06.132 if alo < ahi:
2025-07-01 17:49:06.132 if blo < bhi:
2025-07-01 17:49:06.132 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.132 else:
2025-07-01 17:49:06.132 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.132 elif blo < bhi:
2025-07-01 17:49:06.132 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.133
2025-07-01 17:49:06.137 > yield from g
2025-07-01 17:49:06.137
2025-07-01 17:49:06.138 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.138 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.138
2025-07-01 17:49:06.138 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.138 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.138 alo = 105, ahi = 1101
2025-07-01 17:49:06.138 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.138 blo = 105, bhi = 1101
2025-07-01 17:49:06.138
2025-07-01 17:49:06.138 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.138 r"""
2025-07-01 17:49:06.138 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.138 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.138 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.138 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.138
2025-07-01 17:49:06.139 Example:
2025-07-01 17:49:06.139
2025-07-01 17:49:06.139 >>> d = Differ()
2025-07-01 17:49:06.139 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.139 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.139 >>> print(''.join(results), end="")
2025-07-01 17:49:06.139 - abcDefghiJkl
2025-07-01 17:49:06.139 + abcdefGhijkl
2025-07-01 17:49:06.139 """
2025-07-01 17:49:06.139
2025-07-01 17:49:06.139 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.139 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.139 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.139 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.139 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.140
2025-07-01 17:49:06.140 # search for the pair that matches best without being identical
2025-07-01 17:49:06.140 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.140 # on junk -- unless we have to)
2025-07-01 17:49:06.140 for j in range(blo, bhi):
2025-07-01 17:49:06.140 bj = b[j]
2025-07-01 17:49:06.140 cruncher.set_seq2(bj)
2025-07-01 17:49:06.140 for i in range(alo, ahi):
2025-07-01 17:49:06.140 ai = a[i]
2025-07-01 17:49:06.140 if ai == bj:
2025-07-01 17:49:06.140 if eqi is None:
2025-07-01 17:49:06.140 eqi, eqj = i, j
2025-07-01 17:49:06.140 continue
2025-07-01 17:49:06.140 cruncher.set_seq1(ai)
2025-07-01 17:49:06.140 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.140 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.140 # compares by a factor of 3.
2025-07-01 17:49:06.141 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.141 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.141 # of the computation is cached by cruncher
2025-07-01 17:49:06.141 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.141 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.141 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.141 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.141 if best_ratio < cutoff:
2025-07-01 17:49:06.141 # no non-identical "pretty close" pair
2025-07-01 17:49:06.141 if eqi is None:
2025-07-01 17:49:06.141 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.141 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.141 return
2025-07-01 17:49:06.141 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.141 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.141 else:
2025-07-01 17:49:06.142 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.142 eqi = None
2025-07-01 17:49:06.142
2025-07-01 17:49:06.142 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.142 # identical
2025-07-01 17:49:06.142
2025-07-01 17:49:06.142 # pump out diffs from before the synch point
2025-07-01 17:49:06.142 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.142
2025-07-01 17:49:06.142 # do intraline marking on the synch pair
2025-07-01 17:49:06.142 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.142 if eqi is None:
2025-07-01 17:49:06.142 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.142 atags = btags = ""
2025-07-01 17:49:06.142 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.142 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.142 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.143 if tag == 'replace':
2025-07-01 17:49:06.143 atags += '^' * la
2025-07-01 17:49:06.143 btags += '^' * lb
2025-07-01 17:49:06.143 elif tag == 'delete':
2025-07-01 17:49:06.143 atags += '-' * la
2025-07-01 17:49:06.143 elif tag == 'insert':
2025-07-01 17:49:06.143 btags += '+' * lb
2025-07-01 17:49:06.143 elif tag == 'equal':
2025-07-01 17:49:06.143 atags += ' ' * la
2025-07-01 17:49:06.143 btags += ' ' * lb
2025-07-01 17:49:06.143 else:
2025-07-01 17:49:06.143 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.143 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.143 else:
2025-07-01 17:49:06.143 # the synch pair is identical
2025-07-01 17:49:06.143 yield ' ' + aelt
2025-07-01 17:49:06.143
2025-07-01 17:49:06.144 # pump out diffs from after the synch point
2025-07-01 17:49:06.144 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.144
2025-07-01 17:49:06.144 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.144 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.144
2025-07-01 17:49:06.144 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.144 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.144 alo = 106, ahi = 1101
2025-07-01 17:49:06.144 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.144 blo = 106, bhi = 1101
2025-07-01 17:49:06.144
2025-07-01 17:49:06.144 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.144 g = []
2025-07-01 17:49:06.144 if alo < ahi:
2025-07-01 17:49:06.144 if blo < bhi:
2025-07-01 17:49:06.144 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.145 else:
2025-07-01 17:49:06.145 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.145 elif blo < bhi:
2025-07-01 17:49:06.145 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.145
2025-07-01 17:49:06.145 > yield from g
2025-07-01 17:49:06.145
2025-07-01 17:49:06.145 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.145 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.145
2025-07-01 17:49:06.145 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.145 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.145 alo = 106, ahi = 1101
2025-07-01 17:49:06.145 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.145 blo = 106, bhi = 1101
2025-07-01 17:49:06.145
2025-07-01 17:49:06.146 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.146 r"""
2025-07-01 17:49:06.146 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.146 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.146 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.146 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.146
2025-07-01 17:49:06.146 Example:
2025-07-01 17:49:06.146
2025-07-01 17:49:06.146 >>> d = Differ()
2025-07-01 17:49:06.146 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.146 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.146 >>> print(''.join(results), end="")
2025-07-01 17:49:06.146 - abcDefghiJkl
2025-07-01 17:49:06.146 + abcdefGhijkl
2025-07-01 17:49:06.147 """
2025-07-01 17:49:06.147
2025-07-01 17:49:06.147 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.147 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.147 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.147 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.147 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.147
2025-07-01 17:49:06.147 # search for the pair that matches best without being identical
2025-07-01 17:49:06.147 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.147 # on junk -- unless we have to)
2025-07-01 17:49:06.147 for j in range(blo, bhi):
2025-07-01 17:49:06.147 bj = b[j]
2025-07-01 17:49:06.147 cruncher.set_seq2(bj)
2025-07-01 17:49:06.147 for i in range(alo, ahi):
2025-07-01 17:49:06.147 ai = a[i]
2025-07-01 17:49:06.151 if ai == bj:
2025-07-01 17:49:06.151 if eqi is None:
2025-07-01 17:49:06.151 eqi, eqj = i, j
2025-07-01 17:49:06.151 continue
2025-07-01 17:49:06.151 cruncher.set_seq1(ai)
2025-07-01 17:49:06.151 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.151 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.151 # compares by a factor of 3.
2025-07-01 17:49:06.151 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.151 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.151 # of the computation is cached by cruncher
2025-07-01 17:49:06.152 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.152 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.152 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.152 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.152 if best_ratio < cutoff:
2025-07-01 17:49:06.152 # no non-identical "pretty close" pair
2025-07-01 17:49:06.152 if eqi is None:
2025-07-01 17:49:06.152 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.152 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.152 return
2025-07-01 17:49:06.152 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.152 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.152 else:
2025-07-01 17:49:06.152 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.152 eqi = None
2025-07-01 17:49:06.152
2025-07-01 17:49:06.152 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.153 # identical
2025-07-01 17:49:06.153
2025-07-01 17:49:06.153 # pump out diffs from before the synch point
2025-07-01 17:49:06.153 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.153
2025-07-01 17:49:06.153 # do intraline marking on the synch pair
2025-07-01 17:49:06.153 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.153 if eqi is None:
2025-07-01 17:49:06.153 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.153 atags = btags = ""
2025-07-01 17:49:06.153 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.153 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.153 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.153 if tag == 'replace':
2025-07-01 17:49:06.153 atags += '^' * la
2025-07-01 17:49:06.153 btags += '^' * lb
2025-07-01 17:49:06.154 elif tag == 'delete':
2025-07-01 17:49:06.154 atags += '-' * la
2025-07-01 17:49:06.154 elif tag == 'insert':
2025-07-01 17:49:06.154 btags += '+' * lb
2025-07-01 17:49:06.154 elif tag == 'equal':
2025-07-01 17:49:06.154 atags += ' ' * la
2025-07-01 17:49:06.154 btags += ' ' * lb
2025-07-01 17:49:06.154 else:
2025-07-01 17:49:06.154 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.154 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.154 else:
2025-07-01 17:49:06.154 # the synch pair is identical
2025-07-01 17:49:06.154 yield ' ' + aelt
2025-07-01 17:49:06.154
2025-07-01 17:49:06.154 # pump out diffs from after the synch point
2025-07-01 17:49:06.154 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.155
2025-07-01 17:49:06.155 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.155 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.155
2025-07-01 17:49:06.155 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.155 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.155 alo = 107, ahi = 1101
2025-07-01 17:49:06.155 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.155 blo = 107, bhi = 1101
2025-07-01 17:49:06.155
2025-07-01 17:49:06.155 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.155 g = []
2025-07-01 17:49:06.155 if alo < ahi:
2025-07-01 17:49:06.155 if blo < bhi:
2025-07-01 17:49:06.155 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.155 else:
2025-07-01 17:49:06.155 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.155 elif blo < bhi:
2025-07-01 17:49:06.156 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.156
2025-07-01 17:49:06.156 > yield from g
2025-07-01 17:49:06.156
2025-07-01 17:49:06.156 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.156 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.156
2025-07-01 17:49:06.156 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.156 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.156 alo = 107, ahi = 1101
2025-07-01 17:49:06.156 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.156 blo = 107, bhi = 1101
2025-07-01 17:49:06.156
2025-07-01 17:49:06.156 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.156 r"""
2025-07-01 17:49:06.157 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.157 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.157 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.157 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.157
2025-07-01 17:49:06.157 Example:
2025-07-01 17:49:06.157
2025-07-01 17:49:06.157 >>> d = Differ()
2025-07-01 17:49:06.157 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.157 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.157 >>> print(''.join(results), end="")
2025-07-01 17:49:06.157 - abcDefghiJkl
2025-07-01 17:49:06.157 + abcdefGhijkl
2025-07-01 17:49:06.157 """
2025-07-01 17:49:06.157
2025-07-01 17:49:06.158 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.158 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.158 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.158 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.158 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.158
2025-07-01 17:49:06.158 # search for the pair that matches best without being identical
2025-07-01 17:49:06.158 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.158 # on junk -- unless we have to)
2025-07-01 17:49:06.158 for j in range(blo, bhi):
2025-07-01 17:49:06.158 bj = b[j]
2025-07-01 17:49:06.158 cruncher.set_seq2(bj)
2025-07-01 17:49:06.158 for i in range(alo, ahi):
2025-07-01 17:49:06.158 ai = a[i]
2025-07-01 17:49:06.158 if ai == bj:
2025-07-01 17:49:06.158 if eqi is None:
2025-07-01 17:49:06.158 eqi, eqj = i, j
2025-07-01 17:49:06.159 continue
2025-07-01 17:49:06.159 cruncher.set_seq1(ai)
2025-07-01 17:49:06.159 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.159 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.159 # compares by a factor of 3.
2025-07-01 17:49:06.159 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.159 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.159 # of the computation is cached by cruncher
2025-07-01 17:49:06.159 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.159 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.159 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.159 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.159 if best_ratio < cutoff:
2025-07-01 17:49:06.159 # no non-identical "pretty close" pair
2025-07-01 17:49:06.159 if eqi is None:
2025-07-01 17:49:06.159 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.160 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.160 return
2025-07-01 17:49:06.160 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.160 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.160 else:
2025-07-01 17:49:06.160 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.160 eqi = None
2025-07-01 17:49:06.160
2025-07-01 17:49:06.160 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.160 # identical
2025-07-01 17:49:06.160
2025-07-01 17:49:06.160 # pump out diffs from before the synch point
2025-07-01 17:49:06.160 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.160
2025-07-01 17:49:06.160 # do intraline marking on the synch pair
2025-07-01 17:49:06.160 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.161 if eqi is None:
2025-07-01 17:49:06.161 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.161 atags = btags = ""
2025-07-01 17:49:06.161 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.161 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.161 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.161 if tag == 'replace':
2025-07-01 17:49:06.161 atags += '^' * la
2025-07-01 17:49:06.161 btags += '^' * lb
2025-07-01 17:49:06.161 elif tag == 'delete':
2025-07-01 17:49:06.161 atags += '-' * la
2025-07-01 17:49:06.161 elif tag == 'insert':
2025-07-01 17:49:06.161 btags += '+' * lb
2025-07-01 17:49:06.161 elif tag == 'equal':
2025-07-01 17:49:06.161 atags += ' ' * la
2025-07-01 17:49:06.161 btags += ' ' * lb
2025-07-01 17:49:06.161 else:
2025-07-01 17:49:06.161 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.162 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.162 else:
2025-07-01 17:49:06.162 # the synch pair is identical
2025-07-01 17:49:06.162 yield ' ' + aelt
2025-07-01 17:49:06.162
2025-07-01 17:49:06.162 # pump out diffs from after the synch point
2025-07-01 17:49:06.162 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.162
2025-07-01 17:49:06.162 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.162 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.162
2025-07-01 17:49:06.162 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.162 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.162 alo = 108, ahi = 1101
2025-07-01 17:49:06.162 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.162 blo = 108, bhi = 1101
2025-07-01 17:49:06.163
2025-07-01 17:49:06.163 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.163 g = []
2025-07-01 17:49:06.163 if alo < ahi:
2025-07-01 17:49:06.163 if blo < bhi:
2025-07-01 17:49:06.163 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.163 else:
2025-07-01 17:49:06.163 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.163 elif blo < bhi:
2025-07-01 17:49:06.163 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.163
2025-07-01 17:49:06.163 > yield from g
2025-07-01 17:49:06.163
2025-07-01 17:49:06.163 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.163 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.163
2025-07-01 17:49:06.163 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.163 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.168 alo = 108, ahi = 1101
2025-07-01 17:49:06.168 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.169 blo = 108, bhi = 1101
2025-07-01 17:49:06.169
2025-07-01 17:49:06.169 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.169 r"""
2025-07-01 17:49:06.169 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.169 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.169 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.169 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.169
2025-07-01 17:49:06.169 Example:
2025-07-01 17:49:06.169
2025-07-01 17:49:06.169 >>> d = Differ()
2025-07-01 17:49:06.169 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.169 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.169 >>> print(''.join(results), end="")
2025-07-01 17:49:06.169 - abcDefghiJkl
2025-07-01 17:49:06.170 + abcdefGhijkl
2025-07-01 17:49:06.170 """
2025-07-01 17:49:06.170
2025-07-01 17:49:06.170 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.170 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.170 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.170 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.170 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.170
2025-07-01 17:49:06.170 # search for the pair that matches best without being identical
2025-07-01 17:49:06.170 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.170 # on junk -- unless we have to)
2025-07-01 17:49:06.170 for j in range(blo, bhi):
2025-07-01 17:49:06.170 bj = b[j]
2025-07-01 17:49:06.170 cruncher.set_seq2(bj)
2025-07-01 17:49:06.171 for i in range(alo, ahi):
2025-07-01 17:49:06.171 ai = a[i]
2025-07-01 17:49:06.171 if ai == bj:
2025-07-01 17:49:06.171 if eqi is None:
2025-07-01 17:49:06.171 eqi, eqj = i, j
2025-07-01 17:49:06.171 continue
2025-07-01 17:49:06.171 cruncher.set_seq1(ai)
2025-07-01 17:49:06.171 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.171 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.171 # compares by a factor of 3.
2025-07-01 17:49:06.171 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.171 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.171 # of the computation is cached by cruncher
2025-07-01 17:49:06.171 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.171 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.171 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.172 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.172 if best_ratio < cutoff:
2025-07-01 17:49:06.172 # no non-identical "pretty close" pair
2025-07-01 17:49:06.172 if eqi is None:
2025-07-01 17:49:06.172 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.172 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.172 return
2025-07-01 17:49:06.172 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.172 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.172 else:
2025-07-01 17:49:06.172 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.172 eqi = None
2025-07-01 17:49:06.172
2025-07-01 17:49:06.172 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.172 # identical
2025-07-01 17:49:06.172
2025-07-01 17:49:06.172 # pump out diffs from before the synch point
2025-07-01 17:49:06.172 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.173
2025-07-01 17:49:06.173 # do intraline marking on the synch pair
2025-07-01 17:49:06.173 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.173 if eqi is None:
2025-07-01 17:49:06.173 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.173 atags = btags = ""
2025-07-01 17:49:06.173 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.173 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.173 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.173 if tag == 'replace':
2025-07-01 17:49:06.173 atags += '^' * la
2025-07-01 17:49:06.173 btags += '^' * lb
2025-07-01 17:49:06.173 elif tag == 'delete':
2025-07-01 17:49:06.173 atags += '-' * la
2025-07-01 17:49:06.173 elif tag == 'insert':
2025-07-01 17:49:06.173 btags += '+' * lb
2025-07-01 17:49:06.173 elif tag == 'equal':
2025-07-01 17:49:06.174 atags += ' ' * la
2025-07-01 17:49:06.174 btags += ' ' * lb
2025-07-01 17:49:06.174 else:
2025-07-01 17:49:06.174 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.174 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.174 else:
2025-07-01 17:49:06.174 # the synch pair is identical
2025-07-01 17:49:06.174 yield ' ' + aelt
2025-07-01 17:49:06.174
2025-07-01 17:49:06.174 # pump out diffs from after the synch point
2025-07-01 17:49:06.174 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.174
2025-07-01 17:49:06.174 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.174 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.174
2025-07-01 17:49:06.174 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.175 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.175 alo = 109, ahi = 1101
2025-07-01 17:49:06.175 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.175 blo = 109, bhi = 1101
2025-07-01 17:49:06.175
2025-07-01 17:49:06.175 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.175 g = []
2025-07-01 17:49:06.175 if alo < ahi:
2025-07-01 17:49:06.175 if blo < bhi:
2025-07-01 17:49:06.175 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.175 else:
2025-07-01 17:49:06.175 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.175 elif blo < bhi:
2025-07-01 17:49:06.175 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.175
2025-07-01 17:49:06.176 > yield from g
2025-07-01 17:49:06.176
2025-07-01 17:49:06.176 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.176 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.176
2025-07-01 17:49:06.176 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.176 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.176 alo = 109, ahi = 1101
2025-07-01 17:49:06.176 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.176 blo = 109, bhi = 1101
2025-07-01 17:49:06.176
2025-07-01 17:49:06.176 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.176 r"""
2025-07-01 17:49:06.176 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.176 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.176 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.176 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.177
2025-07-01 17:49:06.177 Example:
2025-07-01 17:49:06.177
2025-07-01 17:49:06.177 >>> d = Differ()
2025-07-01 17:49:06.177 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.177 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.177 >>> print(''.join(results), end="")
2025-07-01 17:49:06.177 - abcDefghiJkl
2025-07-01 17:49:06.177 + abcdefGhijkl
2025-07-01 17:49:06.177 """
2025-07-01 17:49:06.177
2025-07-01 17:49:06.177 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.177 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.177 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.177 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.178 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.178
2025-07-01 17:49:06.178 # search for the pair that matches best without being identical
2025-07-01 17:49:06.178 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.178 # on junk -- unless we have to)
2025-07-01 17:49:06.178 for j in range(blo, bhi):
2025-07-01 17:49:06.178 bj = b[j]
2025-07-01 17:49:06.178 cruncher.set_seq2(bj)
2025-07-01 17:49:06.178 for i in range(alo, ahi):
2025-07-01 17:49:06.178 ai = a[i]
2025-07-01 17:49:06.178 if ai == bj:
2025-07-01 17:49:06.178 if eqi is None:
2025-07-01 17:49:06.178 eqi, eqj = i, j
2025-07-01 17:49:06.178 continue
2025-07-01 17:49:06.178 cruncher.set_seq1(ai)
2025-07-01 17:49:06.178 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.179 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.184 # compares by a factor of 3.
2025-07-01 17:49:06.184 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.184 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.184 # of the computation is cached by cruncher
2025-07-01 17:49:06.184 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.185 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.185 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.185 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.185 if best_ratio < cutoff:
2025-07-01 17:49:06.185 # no non-identical "pretty close" pair
2025-07-01 17:49:06.185 if eqi is None:
2025-07-01 17:49:06.185 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.185 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.185 return
2025-07-01 17:49:06.185 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.185 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.185 else:
2025-07-01 17:49:06.185 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.185 eqi = None
2025-07-01 17:49:06.185
2025-07-01 17:49:06.185 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.186 # identical
2025-07-01 17:49:06.186
2025-07-01 17:49:06.186 # pump out diffs from before the synch point
2025-07-01 17:49:06.186 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.186
2025-07-01 17:49:06.186 # do intraline marking on the synch pair
2025-07-01 17:49:06.186 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.186 if eqi is None:
2025-07-01 17:49:06.186 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.186 atags = btags = ""
2025-07-01 17:49:06.186 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.186 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.186 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.186 if tag == 'replace':
2025-07-01 17:49:06.186 atags += '^' * la
2025-07-01 17:49:06.186 btags += '^' * lb
2025-07-01 17:49:06.186 elif tag == 'delete':
2025-07-01 17:49:06.187 atags += '-' * la
2025-07-01 17:49:06.187 elif tag == 'insert':
2025-07-01 17:49:06.187 btags += '+' * lb
2025-07-01 17:49:06.187 elif tag == 'equal':
2025-07-01 17:49:06.187 atags += ' ' * la
2025-07-01 17:49:06.187 btags += ' ' * lb
2025-07-01 17:49:06.187 else:
2025-07-01 17:49:06.187 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.187 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.187 else:
2025-07-01 17:49:06.187 # the synch pair is identical
2025-07-01 17:49:06.187 yield ' ' + aelt
2025-07-01 17:49:06.187
2025-07-01 17:49:06.187 # pump out diffs from after the synch point
2025-07-01 17:49:06.187 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.188
2025-07-01 17:49:06.188 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.188 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.188
2025-07-01 17:49:06.188 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.188 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.188 alo = 110, ahi = 1101
2025-07-01 17:49:06.188 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.188 blo = 110, bhi = 1101
2025-07-01 17:49:06.188
2025-07-01 17:49:06.188 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.188 g = []
2025-07-01 17:49:06.188 if alo < ahi:
2025-07-01 17:49:06.188 if blo < bhi:
2025-07-01 17:49:06.188 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.188 else:
2025-07-01 17:49:06.189 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.189 elif blo < bhi:
2025-07-01 17:49:06.189 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.189
2025-07-01 17:49:06.189 > yield from g
2025-07-01 17:49:06.189
2025-07-01 17:49:06.189 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.189 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.189
2025-07-01 17:49:06.189 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.189 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.189 alo = 110, ahi = 1101
2025-07-01 17:49:06.189 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.189 blo = 110, bhi = 1101
2025-07-01 17:49:06.189
2025-07-01 17:49:06.189 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.190 r"""
2025-07-01 17:49:06.190 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.190 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.190 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.190 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.190
2025-07-01 17:49:06.190 Example:
2025-07-01 17:49:06.190
2025-07-01 17:49:06.190 >>> d = Differ()
2025-07-01 17:49:06.190 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.190 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.190 >>> print(''.join(results), end="")
2025-07-01 17:49:06.190 - abcDefghiJkl
2025-07-01 17:49:06.190 + abcdefGhijkl
2025-07-01 17:49:06.191 """
2025-07-01 17:49:06.191
2025-07-01 17:49:06.191 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.191 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.191 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.191 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.191 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.191
2025-07-01 17:49:06.191 # search for the pair that matches best without being identical
2025-07-01 17:49:06.191 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.191 # on junk -- unless we have to)
2025-07-01 17:49:06.191 for j in range(blo, bhi):
2025-07-01 17:49:06.191 bj = b[j]
2025-07-01 17:49:06.191 cruncher.set_seq2(bj)
2025-07-01 17:49:06.191 for i in range(alo, ahi):
2025-07-01 17:49:06.191 ai = a[i]
2025-07-01 17:49:06.192 if ai == bj:
2025-07-01 17:49:06.192 if eqi is None:
2025-07-01 17:49:06.192 eqi, eqj = i, j
2025-07-01 17:49:06.192 continue
2025-07-01 17:49:06.192 cruncher.set_seq1(ai)
2025-07-01 17:49:06.192 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.192 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.192 # compares by a factor of 3.
2025-07-01 17:49:06.192 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.192 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.192 # of the computation is cached by cruncher
2025-07-01 17:49:06.192 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.192 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.192 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.192 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.192 if best_ratio < cutoff:
2025-07-01 17:49:06.193 # no non-identical "pretty close" pair
2025-07-01 17:49:06.193 if eqi is None:
2025-07-01 17:49:06.193 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.193 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.193 return
2025-07-01 17:49:06.193 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.193 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.193 else:
2025-07-01 17:49:06.193 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.193 eqi = None
2025-07-01 17:49:06.193
2025-07-01 17:49:06.193 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.193 # identical
2025-07-01 17:49:06.193
2025-07-01 17:49:06.193 # pump out diffs from before the synch point
2025-07-01 17:49:06.193 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.194
2025-07-01 17:49:06.194 # do intraline marking on the synch pair
2025-07-01 17:49:06.194 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.194 if eqi is None:
2025-07-01 17:49:06.194 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.194 atags = btags = ""
2025-07-01 17:49:06.194 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.194 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.194 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.194 if tag == 'replace':
2025-07-01 17:49:06.194 atags += '^' * la
2025-07-01 17:49:06.194 btags += '^' * lb
2025-07-01 17:49:06.194 elif tag == 'delete':
2025-07-01 17:49:06.194 atags += '-' * la
2025-07-01 17:49:06.194 elif tag == 'insert':
2025-07-01 17:49:06.194 btags += '+' * lb
2025-07-01 17:49:06.194 elif tag == 'equal':
2025-07-01 17:49:06.199 atags += ' ' * la
2025-07-01 17:49:06.200 btags += ' ' * lb
2025-07-01 17:49:06.200 else:
2025-07-01 17:49:06.200 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.200 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.200 else:
2025-07-01 17:49:06.200 # the synch pair is identical
2025-07-01 17:49:06.200 yield ' ' + aelt
2025-07-01 17:49:06.200
2025-07-01 17:49:06.200 # pump out diffs from after the synch point
2025-07-01 17:49:06.200 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.200
2025-07-01 17:49:06.200 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.200 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.200
2025-07-01 17:49:06.200 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.200 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.201 alo = 111, ahi = 1101
2025-07-01 17:49:06.201 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.201 blo = 111, bhi = 1101
2025-07-01 17:49:06.201
2025-07-01 17:49:06.201 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.201 g = []
2025-07-01 17:49:06.201 if alo < ahi:
2025-07-01 17:49:06.201 if blo < bhi:
2025-07-01 17:49:06.201 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.201 else:
2025-07-01 17:49:06.201 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.201 elif blo < bhi:
2025-07-01 17:49:06.201 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.201
2025-07-01 17:49:06.201 > yield from g
2025-07-01 17:49:06.201
2025-07-01 17:49:06.202 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.202 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.202
2025-07-01 17:49:06.202 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.202 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.202 alo = 111, ahi = 1101
2025-07-01 17:49:06.202 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.202 blo = 111, bhi = 1101
2025-07-01 17:49:06.202
2025-07-01 17:49:06.202 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.202 r"""
2025-07-01 17:49:06.202 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.202 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.202 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.202 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.202
2025-07-01 17:49:06.203 Example:
2025-07-01 17:49:06.203
2025-07-01 17:49:06.203 >>> d = Differ()
2025-07-01 17:49:06.203 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.203 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.203 >>> print(''.join(results), end="")
2025-07-01 17:49:06.203 - abcDefghiJkl
2025-07-01 17:49:06.203 + abcdefGhijkl
2025-07-01 17:49:06.203 """
2025-07-01 17:49:06.203
2025-07-01 17:49:06.203 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.203 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.203 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.203 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.203 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.204
2025-07-01 17:49:06.204 # search for the pair that matches best without being identical
2025-07-01 17:49:06.204 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.204 # on junk -- unless we have to)
2025-07-01 17:49:06.204 for j in range(blo, bhi):
2025-07-01 17:49:06.204 bj = b[j]
2025-07-01 17:49:06.204 cruncher.set_seq2(bj)
2025-07-01 17:49:06.204 for i in range(alo, ahi):
2025-07-01 17:49:06.204 ai = a[i]
2025-07-01 17:49:06.204 if ai == bj:
2025-07-01 17:49:06.204 if eqi is None:
2025-07-01 17:49:06.204 eqi, eqj = i, j
2025-07-01 17:49:06.204 continue
2025-07-01 17:49:06.204 cruncher.set_seq1(ai)
2025-07-01 17:49:06.204 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.204 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.205 # compares by a factor of 3.
2025-07-01 17:49:06.205 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.205 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.205 # of the computation is cached by cruncher
2025-07-01 17:49:06.205 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.205 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.205 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.205 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.205 if best_ratio < cutoff:
2025-07-01 17:49:06.205 # no non-identical "pretty close" pair
2025-07-01 17:49:06.205 if eqi is None:
2025-07-01 17:49:06.205 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.205 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.205 return
2025-07-01 17:49:06.205 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.205 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.206 else:
2025-07-01 17:49:06.206 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.206 eqi = None
2025-07-01 17:49:06.206
2025-07-01 17:49:06.206 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.206 # identical
2025-07-01 17:49:06.206
2025-07-01 17:49:06.206 # pump out diffs from before the synch point
2025-07-01 17:49:06.206 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.206
2025-07-01 17:49:06.206 # do intraline marking on the synch pair
2025-07-01 17:49:06.206 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.206 if eqi is None:
2025-07-01 17:49:06.206 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.206 atags = btags = ""
2025-07-01 17:49:06.207 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.207 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.207 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.207 if tag == 'replace':
2025-07-01 17:49:06.207 atags += '^' * la
2025-07-01 17:49:06.207 btags += '^' * lb
2025-07-01 17:49:06.207 elif tag == 'delete':
2025-07-01 17:49:06.207 atags += '-' * la
2025-07-01 17:49:06.207 elif tag == 'insert':
2025-07-01 17:49:06.207 btags += '+' * lb
2025-07-01 17:49:06.207 elif tag == 'equal':
2025-07-01 17:49:06.207 atags += ' ' * la
2025-07-01 17:49:06.207 btags += ' ' * lb
2025-07-01 17:49:06.207 else:
2025-07-01 17:49:06.207 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.207 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.208 else:
2025-07-01 17:49:06.208 # the synch pair is identical
2025-07-01 17:49:06.208 yield ' ' + aelt
2025-07-01 17:49:06.208
2025-07-01 17:49:06.208 # pump out diffs from after the synch point
2025-07-01 17:49:06.208 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.208
2025-07-01 17:49:06.208 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.208 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.208
2025-07-01 17:49:06.208 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.208 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.208 alo = 114, ahi = 1101
2025-07-01 17:49:06.208 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.208 blo = 114, bhi = 1101
2025-07-01 17:49:06.208
2025-07-01 17:49:06.209 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.209 g = []
2025-07-01 17:49:06.209 if alo < ahi:
2025-07-01 17:49:06.209 if blo < bhi:
2025-07-01 17:49:06.209 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.209 else:
2025-07-01 17:49:06.209 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.209 elif blo < bhi:
2025-07-01 17:49:06.209 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.209
2025-07-01 17:49:06.209 > yield from g
2025-07-01 17:49:06.209
2025-07-01 17:49:06.209 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.209 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.209
2025-07-01 17:49:06.209 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.209 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.210 alo = 114, ahi = 1101
2025-07-01 17:49:06.210 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.210 blo = 114, bhi = 1101
2025-07-01 17:49:06.210
2025-07-01 17:49:06.210 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.210 r"""
2025-07-01 17:49:06.210 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.210 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.210 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.210 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.210
2025-07-01 17:49:06.210 Example:
2025-07-01 17:49:06.210
2025-07-01 17:49:06.210 >>> d = Differ()
2025-07-01 17:49:06.210 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.210 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.211 >>> print(''.join(results), end="")
2025-07-01 17:49:06.214 - abcDefghiJkl
2025-07-01 17:49:06.214 + abcdefGhijkl
2025-07-01 17:49:06.214 """
2025-07-01 17:49:06.214
2025-07-01 17:49:06.214 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.214 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.214 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.214 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.214 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.214
2025-07-01 17:49:06.215 # search for the pair that matches best without being identical
2025-07-01 17:49:06.215 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.215 # on junk -- unless we have to)
2025-07-01 17:49:06.215 for j in range(blo, bhi):
2025-07-01 17:49:06.215 bj = b[j]
2025-07-01 17:49:06.215 cruncher.set_seq2(bj)
2025-07-01 17:49:06.215 for i in range(alo, ahi):
2025-07-01 17:49:06.215 ai = a[i]
2025-07-01 17:49:06.215 if ai == bj:
2025-07-01 17:49:06.215 if eqi is None:
2025-07-01 17:49:06.215 eqi, eqj = i, j
2025-07-01 17:49:06.215 continue
2025-07-01 17:49:06.215 cruncher.set_seq1(ai)
2025-07-01 17:49:06.215 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.215 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.215 # compares by a factor of 3.
2025-07-01 17:49:06.216 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.216 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.216 # of the computation is cached by cruncher
2025-07-01 17:49:06.216 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.216 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.216 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.216 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.216 if best_ratio < cutoff:
2025-07-01 17:49:06.216 # no non-identical "pretty close" pair
2025-07-01 17:49:06.216 if eqi is None:
2025-07-01 17:49:06.216 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.216 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.216 return
2025-07-01 17:49:06.216 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.216 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.216 else:
2025-07-01 17:49:06.217 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.217 eqi = None
2025-07-01 17:49:06.217
2025-07-01 17:49:06.217 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.217 # identical
2025-07-01 17:49:06.217
2025-07-01 17:49:06.217 # pump out diffs from before the synch point
2025-07-01 17:49:06.217 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.217
2025-07-01 17:49:06.217 # do intraline marking on the synch pair
2025-07-01 17:49:06.217 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.217 if eqi is None:
2025-07-01 17:49:06.217 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.217 atags = btags = ""
2025-07-01 17:49:06.217 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.217 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.217 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.218 if tag == 'replace':
2025-07-01 17:49:06.218 atags += '^' * la
2025-07-01 17:49:06.218 btags += '^' * lb
2025-07-01 17:49:06.218 elif tag == 'delete':
2025-07-01 17:49:06.218 atags += '-' * la
2025-07-01 17:49:06.218 elif tag == 'insert':
2025-07-01 17:49:06.218 btags += '+' * lb
2025-07-01 17:49:06.218 elif tag == 'equal':
2025-07-01 17:49:06.218 atags += ' ' * la
2025-07-01 17:49:06.218 btags += ' ' * lb
2025-07-01 17:49:06.218 else:
2025-07-01 17:49:06.218 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.218 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.218 else:
2025-07-01 17:49:06.218 # the synch pair is identical
2025-07-01 17:49:06.218 yield ' ' + aelt
2025-07-01 17:49:06.218
2025-07-01 17:49:06.219 # pump out diffs from after the synch point
2025-07-01 17:49:06.219 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.219
2025-07-01 17:49:06.219 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.219 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.219
2025-07-01 17:49:06.219 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.219 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.219 alo = 115, ahi = 1101
2025-07-01 17:49:06.219 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.219 blo = 115, bhi = 1101
2025-07-01 17:49:06.219
2025-07-01 17:49:06.219 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.219 g = []
2025-07-01 17:49:06.219 if alo < ahi:
2025-07-01 17:49:06.220 if blo < bhi:
2025-07-01 17:49:06.220 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.220 else:
2025-07-01 17:49:06.220 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.220 elif blo < bhi:
2025-07-01 17:49:06.220 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.220
2025-07-01 17:49:06.220 > yield from g
2025-07-01 17:49:06.220
2025-07-01 17:49:06.220 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.220 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.220
2025-07-01 17:49:06.220 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.220 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.220 alo = 115, ahi = 1101
2025-07-01 17:49:06.221 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.221 blo = 115, bhi = 1101
2025-07-01 17:49:06.221
2025-07-01 17:49:06.221 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.221 r"""
2025-07-01 17:49:06.221 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.221 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.221 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.221 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.221
2025-07-01 17:49:06.221 Example:
2025-07-01 17:49:06.221
2025-07-01 17:49:06.221 >>> d = Differ()
2025-07-01 17:49:06.221 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.221 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.221 >>> print(''.join(results), end="")
2025-07-01 17:49:06.222 - abcDefghiJkl
2025-07-01 17:49:06.222 + abcdefGhijkl
2025-07-01 17:49:06.222 """
2025-07-01 17:49:06.222
2025-07-01 17:49:06.222 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.222 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.222 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.222 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.222 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.222
2025-07-01 17:49:06.222 # search for the pair that matches best without being identical
2025-07-01 17:49:06.222 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.222 # on junk -- unless we have to)
2025-07-01 17:49:06.222 for j in range(blo, bhi):
2025-07-01 17:49:06.223 bj = b[j]
2025-07-01 17:49:06.223 cruncher.set_seq2(bj)
2025-07-01 17:49:06.223 for i in range(alo, ahi):
2025-07-01 17:49:06.223 ai = a[i]
2025-07-01 17:49:06.223 if ai == bj:
2025-07-01 17:49:06.223 if eqi is None:
2025-07-01 17:49:06.223 eqi, eqj = i, j
2025-07-01 17:49:06.223 continue
2025-07-01 17:49:06.223 cruncher.set_seq1(ai)
2025-07-01 17:49:06.223 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.223 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.223 # compares by a factor of 3.
2025-07-01 17:49:06.223 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.223 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.223 # of the computation is cached by cruncher
2025-07-01 17:49:06.223 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.223 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.224 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.224 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.224 if best_ratio < cutoff:
2025-07-01 17:49:06.224 # no non-identical "pretty close" pair
2025-07-01 17:49:06.224 if eqi is None:
2025-07-01 17:49:06.224 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.224 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.224 return
2025-07-01 17:49:06.224 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.224 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.224 else:
2025-07-01 17:49:06.224 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.224 eqi = None
2025-07-01 17:49:06.224
2025-07-01 17:49:06.224 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.224 # identical
2025-07-01 17:49:06.225
2025-07-01 17:49:06.225 # pump out diffs from before the synch point
2025-07-01 17:49:06.225 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.225
2025-07-01 17:49:06.225 # do intraline marking on the synch pair
2025-07-01 17:49:06.225 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.225 if eqi is None:
2025-07-01 17:49:06.225 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.225 atags = btags = ""
2025-07-01 17:49:06.225 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.225 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.225 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.225 if tag == 'replace':
2025-07-01 17:49:06.225 atags += '^' * la
2025-07-01 17:49:06.225 btags += '^' * lb
2025-07-01 17:49:06.225 elif tag == 'delete':
2025-07-01 17:49:06.226 atags += '-' * la
2025-07-01 17:49:06.231 elif tag == 'insert':
2025-07-01 17:49:06.231 btags += '+' * lb
2025-07-01 17:49:06.231 elif tag == 'equal':
2025-07-01 17:49:06.231 atags += ' ' * la
2025-07-01 17:49:06.231 btags += ' ' * lb
2025-07-01 17:49:06.231 else:
2025-07-01 17:49:06.231 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.231 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.231 else:
2025-07-01 17:49:06.231 # the synch pair is identical
2025-07-01 17:49:06.231 yield ' ' + aelt
2025-07-01 17:49:06.231
2025-07-01 17:49:06.231 # pump out diffs from after the synch point
2025-07-01 17:49:06.231 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.232
2025-07-01 17:49:06.232 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.232 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.232
2025-07-01 17:49:06.232 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.232 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.232 alo = 116, ahi = 1101
2025-07-01 17:49:06.232 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.232 blo = 116, bhi = 1101
2025-07-01 17:49:06.232
2025-07-01 17:49:06.232 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.232 g = []
2025-07-01 17:49:06.232 if alo < ahi:
2025-07-01 17:49:06.232 if blo < bhi:
2025-07-01 17:49:06.232 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.232 else:
2025-07-01 17:49:06.233 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.233 elif blo < bhi:
2025-07-01 17:49:06.233 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.233
2025-07-01 17:49:06.233 > yield from g
2025-07-01 17:49:06.233
2025-07-01 17:49:06.233 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.233 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.233
2025-07-01 17:49:06.233 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.233 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.233 alo = 116, ahi = 1101
2025-07-01 17:49:06.233 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.233 blo = 116, bhi = 1101
2025-07-01 17:49:06.233
2025-07-01 17:49:06.233 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.234 r"""
2025-07-01 17:49:06.234 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.234 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.234 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.234 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.234
2025-07-01 17:49:06.234 Example:
2025-07-01 17:49:06.234
2025-07-01 17:49:06.234 >>> d = Differ()
2025-07-01 17:49:06.234 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.234 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.234 >>> print(''.join(results), end="")
2025-07-01 17:49:06.234 - abcDefghiJkl
2025-07-01 17:49:06.234 + abcdefGhijkl
2025-07-01 17:49:06.235 """
2025-07-01 17:49:06.235
2025-07-01 17:49:06.235 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.235 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.235 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.235 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.235 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.235
2025-07-01 17:49:06.235 # search for the pair that matches best without being identical
2025-07-01 17:49:06.235 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.235 # on junk -- unless we have to)
2025-07-01 17:49:06.235 for j in range(blo, bhi):
2025-07-01 17:49:06.235 bj = b[j]
2025-07-01 17:49:06.235 cruncher.set_seq2(bj)
2025-07-01 17:49:06.235 for i in range(alo, ahi):
2025-07-01 17:49:06.236 ai = a[i]
2025-07-01 17:49:06.236 if ai == bj:
2025-07-01 17:49:06.236 if eqi is None:
2025-07-01 17:49:06.236 eqi, eqj = i, j
2025-07-01 17:49:06.236 continue
2025-07-01 17:49:06.236 cruncher.set_seq1(ai)
2025-07-01 17:49:06.236 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.236 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.236 # compares by a factor of 3.
2025-07-01 17:49:06.236 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.236 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.236 # of the computation is cached by cruncher
2025-07-01 17:49:06.236 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.236 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.236 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.236 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.237 if best_ratio < cutoff:
2025-07-01 17:49:06.237 # no non-identical "pretty close" pair
2025-07-01 17:49:06.237 if eqi is None:
2025-07-01 17:49:06.237 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.237 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.237 return
2025-07-01 17:49:06.237 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.237 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.237 else:
2025-07-01 17:49:06.237 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.237 eqi = None
2025-07-01 17:49:06.237
2025-07-01 17:49:06.237 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.237 # identical
2025-07-01 17:49:06.237
2025-07-01 17:49:06.238 # pump out diffs from before the synch point
2025-07-01 17:49:06.238 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.238
2025-07-01 17:49:06.238 # do intraline marking on the synch pair
2025-07-01 17:49:06.238 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.238 if eqi is None:
2025-07-01 17:49:06.238 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.238 atags = btags = ""
2025-07-01 17:49:06.238 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.238 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.238 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.238 if tag == 'replace':
2025-07-01 17:49:06.238 atags += '^' * la
2025-07-01 17:49:06.238 btags += '^' * lb
2025-07-01 17:49:06.238 elif tag == 'delete':
2025-07-01 17:49:06.238 atags += '-' * la
2025-07-01 17:49:06.238 elif tag == 'insert':
2025-07-01 17:49:06.239 btags += '+' * lb
2025-07-01 17:49:06.239 elif tag == 'equal':
2025-07-01 17:49:06.239 atags += ' ' * la
2025-07-01 17:49:06.239 btags += ' ' * lb
2025-07-01 17:49:06.239 else:
2025-07-01 17:49:06.239 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.239 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.239 else:
2025-07-01 17:49:06.239 # the synch pair is identical
2025-07-01 17:49:06.239 yield ' ' + aelt
2025-07-01 17:49:06.239
2025-07-01 17:49:06.239 # pump out diffs from after the synch point
2025-07-01 17:49:06.239 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.239
2025-07-01 17:49:06.239 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.239 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.239
2025-07-01 17:49:06.240 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.240 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.240 alo = 117, ahi = 1101
2025-07-01 17:49:06.240 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.240 blo = 117, bhi = 1101
2025-07-01 17:49:06.240
2025-07-01 17:49:06.240 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.240 g = []
2025-07-01 17:49:06.240 if alo < ahi:
2025-07-01 17:49:06.240 if blo < bhi:
2025-07-01 17:49:06.240 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.240 else:
2025-07-01 17:49:06.240 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.240 elif blo < bhi:
2025-07-01 17:49:06.240 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.240
2025-07-01 17:49:06.241 > yield from g
2025-07-01 17:49:06.241
2025-07-01 17:49:06.241 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.241 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.241
2025-07-01 17:49:06.241 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.241 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.241 alo = 117, ahi = 1101
2025-07-01 17:49:06.241 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.241 blo = 117, bhi = 1101
2025-07-01 17:49:06.241
2025-07-01 17:49:06.241 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.241 r"""
2025-07-01 17:49:06.241 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.241 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.241 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.241 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.247
2025-07-01 17:49:06.247 Example:
2025-07-01 17:49:06.247
2025-07-01 17:49:06.247 >>> d = Differ()
2025-07-01 17:49:06.247 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.247 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.247 >>> print(''.join(results), end="")
2025-07-01 17:49:06.247 - abcDefghiJkl
2025-07-01 17:49:06.247 + abcdefGhijkl
2025-07-01 17:49:06.247 """
2025-07-01 17:49:06.247
2025-07-01 17:49:06.247 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.248 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.248 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.248 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.248 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.248
2025-07-01 17:49:06.248 # search for the pair that matches best without being identical
2025-07-01 17:49:06.248 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.248 # on junk -- unless we have to)
2025-07-01 17:49:06.248 for j in range(blo, bhi):
2025-07-01 17:49:06.248 bj = b[j]
2025-07-01 17:49:06.248 cruncher.set_seq2(bj)
2025-07-01 17:49:06.248 for i in range(alo, ahi):
2025-07-01 17:49:06.248 ai = a[i]
2025-07-01 17:49:06.248 if ai == bj:
2025-07-01 17:49:06.248 if eqi is None:
2025-07-01 17:49:06.248 eqi, eqj = i, j
2025-07-01 17:49:06.249 continue
2025-07-01 17:49:06.249 cruncher.set_seq1(ai)
2025-07-01 17:49:06.249 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.249 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.249 # compares by a factor of 3.
2025-07-01 17:49:06.249 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.249 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.249 # of the computation is cached by cruncher
2025-07-01 17:49:06.249 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.249 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.249 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.249 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.249 if best_ratio < cutoff:
2025-07-01 17:49:06.249 # no non-identical "pretty close" pair
2025-07-01 17:49:06.249 if eqi is None:
2025-07-01 17:49:06.249 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.250 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.250 return
2025-07-01 17:49:06.250 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.250 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.250 else:
2025-07-01 17:49:06.250 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.250 eqi = None
2025-07-01 17:49:06.250
2025-07-01 17:49:06.250 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.250 # identical
2025-07-01 17:49:06.250
2025-07-01 17:49:06.250 # pump out diffs from before the synch point
2025-07-01 17:49:06.250 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.250
2025-07-01 17:49:06.250 # do intraline marking on the synch pair
2025-07-01 17:49:06.251 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.251 if eqi is None:
2025-07-01 17:49:06.251 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.251 atags = btags = ""
2025-07-01 17:49:06.251 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.251 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.251 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.251 if tag == 'replace':
2025-07-01 17:49:06.251 atags += '^' * la
2025-07-01 17:49:06.251 btags += '^' * lb
2025-07-01 17:49:06.251 elif tag == 'delete':
2025-07-01 17:49:06.251 atags += '-' * la
2025-07-01 17:49:06.251 elif tag == 'insert':
2025-07-01 17:49:06.251 btags += '+' * lb
2025-07-01 17:49:06.251 elif tag == 'equal':
2025-07-01 17:49:06.251 atags += ' ' * la
2025-07-01 17:49:06.252 btags += ' ' * lb
2025-07-01 17:49:06.252 else:
2025-07-01 17:49:06.252 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.252 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.252 else:
2025-07-01 17:49:06.252 # the synch pair is identical
2025-07-01 17:49:06.252 yield ' ' + aelt
2025-07-01 17:49:06.252
2025-07-01 17:49:06.252 # pump out diffs from after the synch point
2025-07-01 17:49:06.252 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.252
2025-07-01 17:49:06.252 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.252 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.252
2025-07-01 17:49:06.252 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.252 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.253 alo = 118, ahi = 1101
2025-07-01 17:49:06.253 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.253 blo = 118, bhi = 1101
2025-07-01 17:49:06.253
2025-07-01 17:49:06.253 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.253 g = []
2025-07-01 17:49:06.253 if alo < ahi:
2025-07-01 17:49:06.253 if blo < bhi:
2025-07-01 17:49:06.253 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.253 else:
2025-07-01 17:49:06.253 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.253 elif blo < bhi:
2025-07-01 17:49:06.253 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.253
2025-07-01 17:49:06.253 > yield from g
2025-07-01 17:49:06.253
2025-07-01 17:49:06.254 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.254 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.254
2025-07-01 17:49:06.254 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.254 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.254 alo = 118, ahi = 1101
2025-07-01 17:49:06.254 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.254 blo = 118, bhi = 1101
2025-07-01 17:49:06.254
2025-07-01 17:49:06.254 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.254 r"""
2025-07-01 17:49:06.254 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.254 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.254 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.254 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.255
2025-07-01 17:49:06.255 Example:
2025-07-01 17:49:06.255
2025-07-01 17:49:06.255 >>> d = Differ()
2025-07-01 17:49:06.255 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.255 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.255 >>> print(''.join(results), end="")
2025-07-01 17:49:06.255 - abcDefghiJkl
2025-07-01 17:49:06.255 + abcdefGhijkl
2025-07-01 17:49:06.255 """
2025-07-01 17:49:06.255
2025-07-01 17:49:06.255 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.255 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.255 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.255 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.256 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.256
2025-07-01 17:49:06.256 # search for the pair that matches best without being identical
2025-07-01 17:49:06.256 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.256 # on junk -- unless we have to)
2025-07-01 17:49:06.256 for j in range(blo, bhi):
2025-07-01 17:49:06.256 bj = b[j]
2025-07-01 17:49:06.256 cruncher.set_seq2(bj)
2025-07-01 17:49:06.256 for i in range(alo, ahi):
2025-07-01 17:49:06.256 ai = a[i]
2025-07-01 17:49:06.256 if ai == bj:
2025-07-01 17:49:06.256 if eqi is None:
2025-07-01 17:49:06.256 eqi, eqj = i, j
2025-07-01 17:49:06.256 continue
2025-07-01 17:49:06.256 cruncher.set_seq1(ai)
2025-07-01 17:49:06.256 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.257 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.262 # compares by a factor of 3.
2025-07-01 17:49:06.262 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.262 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.262 # of the computation is cached by cruncher
2025-07-01 17:49:06.262 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.262 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.262 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.262 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.262 if best_ratio < cutoff:
2025-07-01 17:49:06.262 # no non-identical "pretty close" pair
2025-07-01 17:49:06.262 if eqi is None:
2025-07-01 17:49:06.262 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.262 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.263 return
2025-07-01 17:49:06.263 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.263 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.263 else:
2025-07-01 17:49:06.263 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.263 eqi = None
2025-07-01 17:49:06.263
2025-07-01 17:49:06.263 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.263 # identical
2025-07-01 17:49:06.263
2025-07-01 17:49:06.263 # pump out diffs from before the synch point
2025-07-01 17:49:06.263 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.263
2025-07-01 17:49:06.263 # do intraline marking on the synch pair
2025-07-01 17:49:06.263 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.263 if eqi is None:
2025-07-01 17:49:06.264 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.264 atags = btags = ""
2025-07-01 17:49:06.264 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.264 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.264 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.264 if tag == 'replace':
2025-07-01 17:49:06.264 atags += '^' * la
2025-07-01 17:49:06.264 btags += '^' * lb
2025-07-01 17:49:06.264 elif tag == 'delete':
2025-07-01 17:49:06.264 atags += '-' * la
2025-07-01 17:49:06.264 elif tag == 'insert':
2025-07-01 17:49:06.264 btags += '+' * lb
2025-07-01 17:49:06.264 elif tag == 'equal':
2025-07-01 17:49:06.264 atags += ' ' * la
2025-07-01 17:49:06.264 btags += ' ' * lb
2025-07-01 17:49:06.264 else:
2025-07-01 17:49:06.265 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.265 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.265 else:
2025-07-01 17:49:06.265 # the synch pair is identical
2025-07-01 17:49:06.265 yield ' ' + aelt
2025-07-01 17:49:06.265
2025-07-01 17:49:06.265 # pump out diffs from after the synch point
2025-07-01 17:49:06.265 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.265
2025-07-01 17:49:06.265 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.265 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.265
2025-07-01 17:49:06.265 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.265 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.265 alo = 119, ahi = 1101
2025-07-01 17:49:06.265 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.266 blo = 119, bhi = 1101
2025-07-01 17:49:06.266
2025-07-01 17:49:06.266 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.266 g = []
2025-07-01 17:49:06.266 if alo < ahi:
2025-07-01 17:49:06.266 if blo < bhi:
2025-07-01 17:49:06.266 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.266 else:
2025-07-01 17:49:06.266 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.266 elif blo < bhi:
2025-07-01 17:49:06.266 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.266
2025-07-01 17:49:06.266 > yield from g
2025-07-01 17:49:06.266
2025-07-01 17:49:06.266 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.266 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.267
2025-07-01 17:49:06.267 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.267 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.267 alo = 119, ahi = 1101
2025-07-01 17:49:06.267 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.267 blo = 119, bhi = 1101
2025-07-01 17:49:06.267
2025-07-01 17:49:06.267 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.267 r"""
2025-07-01 17:49:06.267 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.267 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.267 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.267 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.267
2025-07-01 17:49:06.268 Example:
2025-07-01 17:49:06.268
2025-07-01 17:49:06.268 >>> d = Differ()
2025-07-01 17:49:06.268 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.268 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.268 >>> print(''.join(results), end="")
2025-07-01 17:49:06.268 - abcDefghiJkl
2025-07-01 17:49:06.268 + abcdefGhijkl
2025-07-01 17:49:06.268 """
2025-07-01 17:49:06.268
2025-07-01 17:49:06.268 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.268 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.268 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.268 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.269 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.269
2025-07-01 17:49:06.269 # search for the pair that matches best without being identical
2025-07-01 17:49:06.269 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.269 # on junk -- unless we have to)
2025-07-01 17:49:06.269 for j in range(blo, bhi):
2025-07-01 17:49:06.269 bj = b[j]
2025-07-01 17:49:06.269 cruncher.set_seq2(bj)
2025-07-01 17:49:06.269 for i in range(alo, ahi):
2025-07-01 17:49:06.269 ai = a[i]
2025-07-01 17:49:06.269 if ai == bj:
2025-07-01 17:49:06.269 if eqi is None:
2025-07-01 17:49:06.269 eqi, eqj = i, j
2025-07-01 17:49:06.269 continue
2025-07-01 17:49:06.269 cruncher.set_seq1(ai)
2025-07-01 17:49:06.269 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.270 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.270 # compares by a factor of 3.
2025-07-01 17:49:06.270 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.270 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.270 # of the computation is cached by cruncher
2025-07-01 17:49:06.270 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.270 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.270 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.270 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.270 if best_ratio < cutoff:
2025-07-01 17:49:06.270 # no non-identical "pretty close" pair
2025-07-01 17:49:06.270 if eqi is None:
2025-07-01 17:49:06.270 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.270 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.270 return
2025-07-01 17:49:06.270 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.271 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.271 else:
2025-07-01 17:49:06.271 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.271 eqi = None
2025-07-01 17:49:06.271
2025-07-01 17:49:06.271 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.271 # identical
2025-07-01 17:49:06.271
2025-07-01 17:49:06.271 # pump out diffs from before the synch point
2025-07-01 17:49:06.271 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.271
2025-07-01 17:49:06.271 # do intraline marking on the synch pair
2025-07-01 17:49:06.271 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.271 if eqi is None:
2025-07-01 17:49:06.271 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.271 atags = btags = ""
2025-07-01 17:49:06.274 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.275 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.275 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.275 if tag == 'replace':
2025-07-01 17:49:06.275 atags += '^' * la
2025-07-01 17:49:06.275 btags += '^' * lb
2025-07-01 17:49:06.275 elif tag == 'delete':
2025-07-01 17:49:06.275 atags += '-' * la
2025-07-01 17:49:06.275 elif tag == 'insert':
2025-07-01 17:49:06.275 btags += '+' * lb
2025-07-01 17:49:06.275 elif tag == 'equal':
2025-07-01 17:49:06.275 atags += ' ' * la
2025-07-01 17:49:06.275 btags += ' ' * lb
2025-07-01 17:49:06.275 else:
2025-07-01 17:49:06.275 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.275 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.275 else:
2025-07-01 17:49:06.275 # the synch pair is identical
2025-07-01 17:49:06.275 yield ' ' + aelt
2025-07-01 17:49:06.275
2025-07-01 17:49:06.275 # pump out diffs from after the synch point
2025-07-01 17:49:06.275 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.276
2025-07-01 17:49:06.276 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.276 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.276
2025-07-01 17:49:06.276 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.276 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.276 alo = 120, ahi = 1101
2025-07-01 17:49:06.276 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.276 blo = 120, bhi = 1101
2025-07-01 17:49:06.276
2025-07-01 17:49:06.276 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.276 g = []
2025-07-01 17:49:06.276 if alo < ahi:
2025-07-01 17:49:06.276 if blo < bhi:
2025-07-01 17:49:06.276 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.276 else:
2025-07-01 17:49:06.276 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.276 elif blo < bhi:
2025-07-01 17:49:06.276 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.276
2025-07-01 17:49:06.277 > yield from g
2025-07-01 17:49:06.277
2025-07-01 17:49:06.277 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.277 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.277
2025-07-01 17:49:06.277 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.277 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.277 alo = 120, ahi = 1101
2025-07-01 17:49:06.277 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.277 blo = 120, bhi = 1101
2025-07-01 17:49:06.277
2025-07-01 17:49:06.277 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.277 r"""
2025-07-01 17:49:06.277 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.277 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.277 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.277 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.277
2025-07-01 17:49:06.277 Example:
2025-07-01 17:49:06.278
2025-07-01 17:49:06.278 >>> d = Differ()
2025-07-01 17:49:06.278 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.278 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.278 >>> print(''.join(results), end="")
2025-07-01 17:49:06.278 - abcDefghiJkl
2025-07-01 17:49:06.278 + abcdefGhijkl
2025-07-01 17:49:06.278 """
2025-07-01 17:49:06.278
2025-07-01 17:49:06.278 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.278 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.278 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.278 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.278 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.278
2025-07-01 17:49:06.278 # search for the pair that matches best without being identical
2025-07-01 17:49:06.278 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.278 # on junk -- unless we have to)
2025-07-01 17:49:06.278 for j in range(blo, bhi):
2025-07-01 17:49:06.279 bj = b[j]
2025-07-01 17:49:06.279 cruncher.set_seq2(bj)
2025-07-01 17:49:06.279 for i in range(alo, ahi):
2025-07-01 17:49:06.279 ai = a[i]
2025-07-01 17:49:06.279 if ai == bj:
2025-07-01 17:49:06.279 if eqi is None:
2025-07-01 17:49:06.279 eqi, eqj = i, j
2025-07-01 17:49:06.279 continue
2025-07-01 17:49:06.279 cruncher.set_seq1(ai)
2025-07-01 17:49:06.279 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.279 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.279 # compares by a factor of 3.
2025-07-01 17:49:06.279 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.279 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.279 # of the computation is cached by cruncher
2025-07-01 17:49:06.279 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.279 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.279 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.279 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.279 if best_ratio < cutoff:
2025-07-01 17:49:06.280 # no non-identical "pretty close" pair
2025-07-01 17:49:06.280 if eqi is None:
2025-07-01 17:49:06.280 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.280 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.280 return
2025-07-01 17:49:06.280 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.280 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.280 else:
2025-07-01 17:49:06.280 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.280 eqi = None
2025-07-01 17:49:06.280
2025-07-01 17:49:06.280 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.280 # identical
2025-07-01 17:49:06.280
2025-07-01 17:49:06.280 # pump out diffs from before the synch point
2025-07-01 17:49:06.280 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.280
2025-07-01 17:49:06.280 # do intraline marking on the synch pair
2025-07-01 17:49:06.280 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.281 if eqi is None:
2025-07-01 17:49:06.281 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.281 atags = btags = ""
2025-07-01 17:49:06.281 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.281 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.281 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.281 if tag == 'replace':
2025-07-01 17:49:06.281 atags += '^' * la
2025-07-01 17:49:06.281 btags += '^' * lb
2025-07-01 17:49:06.281 elif tag == 'delete':
2025-07-01 17:49:06.281 atags += '-' * la
2025-07-01 17:49:06.281 elif tag == 'insert':
2025-07-01 17:49:06.281 btags += '+' * lb
2025-07-01 17:49:06.281 elif tag == 'equal':
2025-07-01 17:49:06.281 atags += ' ' * la
2025-07-01 17:49:06.281 btags += ' ' * lb
2025-07-01 17:49:06.281 else:
2025-07-01 17:49:06.281 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.281 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.281 else:
2025-07-01 17:49:06.282 # the synch pair is identical
2025-07-01 17:49:06.282 yield ' ' + aelt
2025-07-01 17:49:06.282
2025-07-01 17:49:06.282 # pump out diffs from after the synch point
2025-07-01 17:49:06.282 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.282
2025-07-01 17:49:06.282 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.282 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.282
2025-07-01 17:49:06.282 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.282 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.282 alo = 121, ahi = 1101
2025-07-01 17:49:06.282 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.282 blo = 121, bhi = 1101
2025-07-01 17:49:06.282
2025-07-01 17:49:06.282 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.282 g = []
2025-07-01 17:49:06.282 if alo < ahi:
2025-07-01 17:49:06.282 if blo < bhi:
2025-07-01 17:49:06.282 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.282 else:
2025-07-01 17:49:06.283 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.283 elif blo < bhi:
2025-07-01 17:49:06.283 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.283
2025-07-01 17:49:06.283 > yield from g
2025-07-01 17:49:06.283
2025-07-01 17:49:06.283 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.283 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.283
2025-07-01 17:49:06.283 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.283 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.283 alo = 121, ahi = 1101
2025-07-01 17:49:06.283 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.283 blo = 121, bhi = 1101
2025-07-01 17:49:06.283
2025-07-01 17:49:06.283 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.283 r"""
2025-07-01 17:49:06.283 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.283 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.283 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.284 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.284
2025-07-01 17:49:06.284 Example:
2025-07-01 17:49:06.284
2025-07-01 17:49:06.284 >>> d = Differ()
2025-07-01 17:49:06.284 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.284 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.284 >>> print(''.join(results), end="")
2025-07-01 17:49:06.284 - abcDefghiJkl
2025-07-01 17:49:06.284 + abcdefGhijkl
2025-07-01 17:49:06.284 """
2025-07-01 17:49:06.284
2025-07-01 17:49:06.284 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.284 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.284 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.284 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.284 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.284
2025-07-01 17:49:06.284 # search for the pair that matches best without being identical
2025-07-01 17:49:06.284 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.285 # on junk -- unless we have to)
2025-07-01 17:49:06.285 for j in range(blo, bhi):
2025-07-01 17:49:06.285 bj = b[j]
2025-07-01 17:49:06.285 cruncher.set_seq2(bj)
2025-07-01 17:49:06.285 for i in range(alo, ahi):
2025-07-01 17:49:06.285 ai = a[i]
2025-07-01 17:49:06.285 if ai == bj:
2025-07-01 17:49:06.285 if eqi is None:
2025-07-01 17:49:06.285 eqi, eqj = i, j
2025-07-01 17:49:06.285 continue
2025-07-01 17:49:06.285 cruncher.set_seq1(ai)
2025-07-01 17:49:06.285 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.285 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.285 # compares by a factor of 3.
2025-07-01 17:49:06.285 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.285 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.285 # of the computation is cached by cruncher
2025-07-01 17:49:06.285 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.285 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.285 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.285 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.286 if best_ratio < cutoff:
2025-07-01 17:49:06.286 # no non-identical "pretty close" pair
2025-07-01 17:49:06.286 if eqi is None:
2025-07-01 17:49:06.286 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.286 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.286 return
2025-07-01 17:49:06.286 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.286 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.286 else:
2025-07-01 17:49:06.286 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.286 eqi = None
2025-07-01 17:49:06.286
2025-07-01 17:49:06.286 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.286 # identical
2025-07-01 17:49:06.286
2025-07-01 17:49:06.286 # pump out diffs from before the synch point
2025-07-01 17:49:06.286 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.286
2025-07-01 17:49:06.286 # do intraline marking on the synch pair
2025-07-01 17:49:06.286 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.286 if eqi is None:
2025-07-01 17:49:06.287 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.287 atags = btags = ""
2025-07-01 17:49:06.287 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.287 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.287 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.287 if tag == 'replace':
2025-07-01 17:49:06.287 atags += '^' * la
2025-07-01 17:49:06.287 btags += '^' * lb
2025-07-01 17:49:06.287 elif tag == 'delete':
2025-07-01 17:49:06.287 atags += '-' * la
2025-07-01 17:49:06.287 elif tag == 'insert':
2025-07-01 17:49:06.287 btags += '+' * lb
2025-07-01 17:49:06.287 elif tag == 'equal':
2025-07-01 17:49:06.287 atags += ' ' * la
2025-07-01 17:49:06.287 btags += ' ' * lb
2025-07-01 17:49:06.287 else:
2025-07-01 17:49:06.287 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.287 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.287 else:
2025-07-01 17:49:06.287 # the synch pair is identical
2025-07-01 17:49:06.287 yield ' ' + aelt
2025-07-01 17:49:06.287
2025-07-01 17:49:06.291 # pump out diffs from after the synch point
2025-07-01 17:49:06.291 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.291
2025-07-01 17:49:06.291 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.291 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.291
2025-07-01 17:49:06.291 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.291 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.291 alo = 122, ahi = 1101
2025-07-01 17:49:06.291 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.291 blo = 122, bhi = 1101
2025-07-01 17:49:06.291
2025-07-01 17:49:06.291 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.291 g = []
2025-07-01 17:49:06.291 if alo < ahi:
2025-07-01 17:49:06.291 if blo < bhi:
2025-07-01 17:49:06.291 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.291 else:
2025-07-01 17:49:06.292 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.292 elif blo < bhi:
2025-07-01 17:49:06.292 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.292
2025-07-01 17:49:06.292 > yield from g
2025-07-01 17:49:06.292
2025-07-01 17:49:06.292 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.292 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.292
2025-07-01 17:49:06.292 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.292 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.292 alo = 122, ahi = 1101
2025-07-01 17:49:06.292 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.292 blo = 122, bhi = 1101
2025-07-01 17:49:06.292
2025-07-01 17:49:06.292 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.292 r"""
2025-07-01 17:49:06.292 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.293 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.293 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.293 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.293
2025-07-01 17:49:06.293 Example:
2025-07-01 17:49:06.293
2025-07-01 17:49:06.293 >>> d = Differ()
2025-07-01 17:49:06.293 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.293 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.293 >>> print(''.join(results), end="")
2025-07-01 17:49:06.293 - abcDefghiJkl
2025-07-01 17:49:06.293 + abcdefGhijkl
2025-07-01 17:49:06.293 """
2025-07-01 17:49:06.293
2025-07-01 17:49:06.293 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.293 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.293 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.293 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.293 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.294
2025-07-01 17:49:06.294 # search for the pair that matches best without being identical
2025-07-01 17:49:06.294 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.294 # on junk -- unless we have to)
2025-07-01 17:49:06.294 for j in range(blo, bhi):
2025-07-01 17:49:06.294 bj = b[j]
2025-07-01 17:49:06.294 cruncher.set_seq2(bj)
2025-07-01 17:49:06.294 for i in range(alo, ahi):
2025-07-01 17:49:06.294 ai = a[i]
2025-07-01 17:49:06.294 if ai == bj:
2025-07-01 17:49:06.294 if eqi is None:
2025-07-01 17:49:06.294 eqi, eqj = i, j
2025-07-01 17:49:06.294 continue
2025-07-01 17:49:06.294 cruncher.set_seq1(ai)
2025-07-01 17:49:06.294 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.294 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.294 # compares by a factor of 3.
2025-07-01 17:49:06.294 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.294 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.294 # of the computation is cached by cruncher
2025-07-01 17:49:06.295 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.295 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.295 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.295 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.295 if best_ratio < cutoff:
2025-07-01 17:49:06.295 # no non-identical "pretty close" pair
2025-07-01 17:49:06.295 if eqi is None:
2025-07-01 17:49:06.295 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.295 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.295 return
2025-07-01 17:49:06.295 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.295 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.295 else:
2025-07-01 17:49:06.295 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.295 eqi = None
2025-07-01 17:49:06.295
2025-07-01 17:49:06.295 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.295 # identical
2025-07-01 17:49:06.295
2025-07-01 17:49:06.295 # pump out diffs from before the synch point
2025-07-01 17:49:06.296 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.296
2025-07-01 17:49:06.296 # do intraline marking on the synch pair
2025-07-01 17:49:06.296 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.296 if eqi is None:
2025-07-01 17:49:06.296 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.296 atags = btags = ""
2025-07-01 17:49:06.296 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.296 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.296 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.296 if tag == 'replace':
2025-07-01 17:49:06.296 atags += '^' * la
2025-07-01 17:49:06.296 btags += '^' * lb
2025-07-01 17:49:06.296 elif tag == 'delete':
2025-07-01 17:49:06.296 atags += '-' * la
2025-07-01 17:49:06.296 elif tag == 'insert':
2025-07-01 17:49:06.296 btags += '+' * lb
2025-07-01 17:49:06.296 elif tag == 'equal':
2025-07-01 17:49:06.296 atags += ' ' * la
2025-07-01 17:49:06.296 btags += ' ' * lb
2025-07-01 17:49:06.296 else:
2025-07-01 17:49:06.297 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.297 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.297 else:
2025-07-01 17:49:06.297 # the synch pair is identical
2025-07-01 17:49:06.297 yield ' ' + aelt
2025-07-01 17:49:06.297
2025-07-01 17:49:06.297 # pump out diffs from after the synch point
2025-07-01 17:49:06.297 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.297
2025-07-01 17:49:06.297 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.297 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.297
2025-07-01 17:49:06.297 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.297 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.297 alo = 123, ahi = 1101
2025-07-01 17:49:06.297 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.297 blo = 123, bhi = 1101
2025-07-01 17:49:06.297
2025-07-01 17:49:06.297 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.297 g = []
2025-07-01 17:49:06.298 if alo < ahi:
2025-07-01 17:49:06.298 if blo < bhi:
2025-07-01 17:49:06.298 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.298 else:
2025-07-01 17:49:06.298 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.298 elif blo < bhi:
2025-07-01 17:49:06.298 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.298
2025-07-01 17:49:06.298 > yield from g
2025-07-01 17:49:06.298
2025-07-01 17:49:06.298 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.298 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.298
2025-07-01 17:49:06.298 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.298 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.298 alo = 123, ahi = 1101
2025-07-01 17:49:06.298 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.298 blo = 123, bhi = 1101
2025-07-01 17:49:06.298
2025-07-01 17:49:06.298 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.299 r"""
2025-07-01 17:49:06.299 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.299 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.299 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.299 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.299
2025-07-01 17:49:06.299 Example:
2025-07-01 17:49:06.299
2025-07-01 17:49:06.299 >>> d = Differ()
2025-07-01 17:49:06.299 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.299 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.299 >>> print(''.join(results), end="")
2025-07-01 17:49:06.299 - abcDefghiJkl
2025-07-01 17:49:06.299 + abcdefGhijkl
2025-07-01 17:49:06.299 """
2025-07-01 17:49:06.299
2025-07-01 17:49:06.299 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.299 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.299 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.300 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.300 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.300
2025-07-01 17:49:06.300 # search for the pair that matches best without being identical
2025-07-01 17:49:06.300 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.300 # on junk -- unless we have to)
2025-07-01 17:49:06.300 for j in range(blo, bhi):
2025-07-01 17:49:06.300 bj = b[j]
2025-07-01 17:49:06.300 cruncher.set_seq2(bj)
2025-07-01 17:49:06.300 for i in range(alo, ahi):
2025-07-01 17:49:06.300 ai = a[i]
2025-07-01 17:49:06.300 if ai == bj:
2025-07-01 17:49:06.300 if eqi is None:
2025-07-01 17:49:06.300 eqi, eqj = i, j
2025-07-01 17:49:06.300 continue
2025-07-01 17:49:06.300 cruncher.set_seq1(ai)
2025-07-01 17:49:06.300 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.300 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.300 # compares by a factor of 3.
2025-07-01 17:49:06.300 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.301 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.301 # of the computation is cached by cruncher
2025-07-01 17:49:06.301 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.301 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.301 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.301 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.301 if best_ratio < cutoff:
2025-07-01 17:49:06.301 # no non-identical "pretty close" pair
2025-07-01 17:49:06.301 if eqi is None:
2025-07-01 17:49:06.301 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.301 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.301 return
2025-07-01 17:49:06.301 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.301 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.301 else:
2025-07-01 17:49:06.301 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.301 eqi = None
2025-07-01 17:49:06.301
2025-07-01 17:49:06.301 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.301 # identical
2025-07-01 17:49:06.302
2025-07-01 17:49:06.302 # pump out diffs from before the synch point
2025-07-01 17:49:06.302 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.302
2025-07-01 17:49:06.302 # do intraline marking on the synch pair
2025-07-01 17:49:06.302 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.302 if eqi is None:
2025-07-01 17:49:06.302 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.302 atags = btags = ""
2025-07-01 17:49:06.302 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.302 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.302 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.302 if tag == 'replace':
2025-07-01 17:49:06.302 atags += '^' * la
2025-07-01 17:49:06.302 btags += '^' * lb
2025-07-01 17:49:06.302 elif tag == 'delete':
2025-07-01 17:49:06.302 atags += '-' * la
2025-07-01 17:49:06.302 elif tag == 'insert':
2025-07-01 17:49:06.302 btags += '+' * lb
2025-07-01 17:49:06.302 elif tag == 'equal':
2025-07-01 17:49:06.302 atags += ' ' * la
2025-07-01 17:49:06.303 btags += ' ' * lb
2025-07-01 17:49:06.305 else:
2025-07-01 17:49:06.306 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.306 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.306 else:
2025-07-01 17:49:06.306 # the synch pair is identical
2025-07-01 17:49:06.306 yield ' ' + aelt
2025-07-01 17:49:06.306
2025-07-01 17:49:06.306 # pump out diffs from after the synch point
2025-07-01 17:49:06.306 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.306
2025-07-01 17:49:06.306 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.306 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.306
2025-07-01 17:49:06.306 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.306 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.306 alo = 124, ahi = 1101
2025-07-01 17:49:06.306 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.306 blo = 124, bhi = 1101
2025-07-01 17:49:06.306
2025-07-01 17:49:06.306 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.306 g = []
2025-07-01 17:49:06.307 if alo < ahi:
2025-07-01 17:49:06.307 if blo < bhi:
2025-07-01 17:49:06.307 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.307 else:
2025-07-01 17:49:06.307 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.307 elif blo < bhi:
2025-07-01 17:49:06.307 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.307
2025-07-01 17:49:06.307 > yield from g
2025-07-01 17:49:06.307
2025-07-01 17:49:06.307 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.307 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.307
2025-07-01 17:49:06.307 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.307 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.307 alo = 124, ahi = 1101
2025-07-01 17:49:06.307 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.307 blo = 124, bhi = 1101
2025-07-01 17:49:06.307
2025-07-01 17:49:06.307 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.308 r"""
2025-07-01 17:49:06.308 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.308 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.308 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.308 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.308
2025-07-01 17:49:06.308 Example:
2025-07-01 17:49:06.308
2025-07-01 17:49:06.308 >>> d = Differ()
2025-07-01 17:49:06.308 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.308 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.308 >>> print(''.join(results), end="")
2025-07-01 17:49:06.308 - abcDefghiJkl
2025-07-01 17:49:06.308 + abcdefGhijkl
2025-07-01 17:49:06.308 """
2025-07-01 17:49:06.308
2025-07-01 17:49:06.308 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.308 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.308 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.309 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.309 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.309
2025-07-01 17:49:06.309 # search for the pair that matches best without being identical
2025-07-01 17:49:06.309 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.309 # on junk -- unless we have to)
2025-07-01 17:49:06.309 for j in range(blo, bhi):
2025-07-01 17:49:06.309 bj = b[j]
2025-07-01 17:49:06.309 cruncher.set_seq2(bj)
2025-07-01 17:49:06.309 for i in range(alo, ahi):
2025-07-01 17:49:06.309 ai = a[i]
2025-07-01 17:49:06.309 if ai == bj:
2025-07-01 17:49:06.309 if eqi is None:
2025-07-01 17:49:06.309 eqi, eqj = i, j
2025-07-01 17:49:06.309 continue
2025-07-01 17:49:06.309 cruncher.set_seq1(ai)
2025-07-01 17:49:06.309 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.309 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.309 # compares by a factor of 3.
2025-07-01 17:49:06.309 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.310 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.310 # of the computation is cached by cruncher
2025-07-01 17:49:06.310 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.310 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.310 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.310 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.310 if best_ratio < cutoff:
2025-07-01 17:49:06.310 # no non-identical "pretty close" pair
2025-07-01 17:49:06.310 if eqi is None:
2025-07-01 17:49:06.310 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.310 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.310 return
2025-07-01 17:49:06.310 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.310 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.310 else:
2025-07-01 17:49:06.310 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.310 eqi = None
2025-07-01 17:49:06.310
2025-07-01 17:49:06.310 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.310 # identical
2025-07-01 17:49:06.311
2025-07-01 17:49:06.311 # pump out diffs from before the synch point
2025-07-01 17:49:06.311 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.311
2025-07-01 17:49:06.311 # do intraline marking on the synch pair
2025-07-01 17:49:06.311 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.311 if eqi is None:
2025-07-01 17:49:06.311 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.311 atags = btags = ""
2025-07-01 17:49:06.311 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.311 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.311 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.311 if tag == 'replace':
2025-07-01 17:49:06.311 atags += '^' * la
2025-07-01 17:49:06.311 btags += '^' * lb
2025-07-01 17:49:06.311 elif tag == 'delete':
2025-07-01 17:49:06.311 atags += '-' * la
2025-07-01 17:49:06.311 elif tag == 'insert':
2025-07-01 17:49:06.311 btags += '+' * lb
2025-07-01 17:49:06.312 elif tag == 'equal':
2025-07-01 17:49:06.312 atags += ' ' * la
2025-07-01 17:49:06.312 btags += ' ' * lb
2025-07-01 17:49:06.312 else:
2025-07-01 17:49:06.312 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.312 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.312 else:
2025-07-01 17:49:06.312 # the synch pair is identical
2025-07-01 17:49:06.312 yield ' ' + aelt
2025-07-01 17:49:06.312
2025-07-01 17:49:06.312 # pump out diffs from after the synch point
2025-07-01 17:49:06.312 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.312
2025-07-01 17:49:06.312 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.312 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.312
2025-07-01 17:49:06.312 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.312 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.312 alo = 125, ahi = 1101
2025-07-01 17:49:06.312 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.313 blo = 125, bhi = 1101
2025-07-01 17:49:06.313
2025-07-01 17:49:06.313 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.313 g = []
2025-07-01 17:49:06.313 if alo < ahi:
2025-07-01 17:49:06.313 if blo < bhi:
2025-07-01 17:49:06.313 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.313 else:
2025-07-01 17:49:06.313 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.313 elif blo < bhi:
2025-07-01 17:49:06.313 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.313
2025-07-01 17:49:06.313 > yield from g
2025-07-01 17:49:06.313
2025-07-01 17:49:06.313 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.313 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.313
2025-07-01 17:49:06.313 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.313 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.313 alo = 125, ahi = 1101
2025-07-01 17:49:06.314 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.314 blo = 125, bhi = 1101
2025-07-01 17:49:06.314
2025-07-01 17:49:06.314 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.314 r"""
2025-07-01 17:49:06.314 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.314 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.314 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.314 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.314
2025-07-01 17:49:06.314 Example:
2025-07-01 17:49:06.314
2025-07-01 17:49:06.314 >>> d = Differ()
2025-07-01 17:49:06.314 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.314 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.314 >>> print(''.join(results), end="")
2025-07-01 17:49:06.314 - abcDefghiJkl
2025-07-01 17:49:06.314 + abcdefGhijkl
2025-07-01 17:49:06.314 """
2025-07-01 17:49:06.315
2025-07-01 17:49:06.315 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.315 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.315 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.315 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.315 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.315
2025-07-01 17:49:06.315 # search for the pair that matches best without being identical
2025-07-01 17:49:06.315 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.315 # on junk -- unless we have to)
2025-07-01 17:49:06.315 for j in range(blo, bhi):
2025-07-01 17:49:06.315 bj = b[j]
2025-07-01 17:49:06.315 cruncher.set_seq2(bj)
2025-07-01 17:49:06.315 for i in range(alo, ahi):
2025-07-01 17:49:06.315 ai = a[i]
2025-07-01 17:49:06.315 if ai == bj:
2025-07-01 17:49:06.315 if eqi is None:
2025-07-01 17:49:06.315 eqi, eqj = i, j
2025-07-01 17:49:06.315 continue
2025-07-01 17:49:06.315 cruncher.set_seq1(ai)
2025-07-01 17:49:06.316 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.316 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.316 # compares by a factor of 3.
2025-07-01 17:49:06.316 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.316 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.316 # of the computation is cached by cruncher
2025-07-01 17:49:06.316 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.316 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.316 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.316 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.316 if best_ratio < cutoff:
2025-07-01 17:49:06.316 # no non-identical "pretty close" pair
2025-07-01 17:49:06.316 if eqi is None:
2025-07-01 17:49:06.316 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.316 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.316 return
2025-07-01 17:49:06.316 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.316 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.316 else:
2025-07-01 17:49:06.316 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.316 eqi = None
2025-07-01 17:49:06.317
2025-07-01 17:49:06.317 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.317 # identical
2025-07-01 17:49:06.317
2025-07-01 17:49:06.317 # pump out diffs from before the synch point
2025-07-01 17:49:06.317 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.317
2025-07-01 17:49:06.317 # do intraline marking on the synch pair
2025-07-01 17:49:06.317 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.317 if eqi is None:
2025-07-01 17:49:06.317 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.317 atags = btags = ""
2025-07-01 17:49:06.317 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.317 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.317 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.317 if tag == 'replace':
2025-07-01 17:49:06.317 atags += '^' * la
2025-07-01 17:49:06.317 btags += '^' * lb
2025-07-01 17:49:06.317 elif tag == 'delete':
2025-07-01 17:49:06.317 atags += '-' * la
2025-07-01 17:49:06.317 elif tag == 'insert':
2025-07-01 17:49:06.318 btags += '+' * lb
2025-07-01 17:49:06.318 elif tag == 'equal':
2025-07-01 17:49:06.318 atags += ' ' * la
2025-07-01 17:49:06.318 btags += ' ' * lb
2025-07-01 17:49:06.318 else:
2025-07-01 17:49:06.318 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.318 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.318 else:
2025-07-01 17:49:06.318 # the synch pair is identical
2025-07-01 17:49:06.318 yield ' ' + aelt
2025-07-01 17:49:06.318
2025-07-01 17:49:06.318 # pump out diffs from after the synch point
2025-07-01 17:49:06.318 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.318
2025-07-01 17:49:06.318 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.318 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.318
2025-07-01 17:49:06.318 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.318 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.318 alo = 126, ahi = 1101
2025-07-01 17:49:06.319 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.322 blo = 126, bhi = 1101
2025-07-01 17:49:06.322
2025-07-01 17:49:06.322 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.322 g = []
2025-07-01 17:49:06.322 if alo < ahi:
2025-07-01 17:49:06.322 if blo < bhi:
2025-07-01 17:49:06.322 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.322 else:
2025-07-01 17:49:06.322 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.322 elif blo < bhi:
2025-07-01 17:49:06.322 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.322
2025-07-01 17:49:06.322 > yield from g
2025-07-01 17:49:06.322
2025-07-01 17:49:06.322 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.322 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.322
2025-07-01 17:49:06.322 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.322 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.323 alo = 126, ahi = 1101
2025-07-01 17:49:06.323 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.323 blo = 126, bhi = 1101
2025-07-01 17:49:06.323
2025-07-01 17:49:06.323 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.323 r"""
2025-07-01 17:49:06.323 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.323 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.323 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.323 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.323
2025-07-01 17:49:06.323 Example:
2025-07-01 17:49:06.323
2025-07-01 17:49:06.323 >>> d = Differ()
2025-07-01 17:49:06.323 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.323 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.323 >>> print(''.join(results), end="")
2025-07-01 17:49:06.323 - abcDefghiJkl
2025-07-01 17:49:06.323 + abcdefGhijkl
2025-07-01 17:49:06.324 """
2025-07-01 17:49:06.324
2025-07-01 17:49:06.324 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.324 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.324 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.324 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.324 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.324
2025-07-01 17:49:06.324 # search for the pair that matches best without being identical
2025-07-01 17:49:06.324 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.324 # on junk -- unless we have to)
2025-07-01 17:49:06.324 for j in range(blo, bhi):
2025-07-01 17:49:06.324 bj = b[j]
2025-07-01 17:49:06.324 cruncher.set_seq2(bj)
2025-07-01 17:49:06.324 for i in range(alo, ahi):
2025-07-01 17:49:06.324 ai = a[i]
2025-07-01 17:49:06.324 if ai == bj:
2025-07-01 17:49:06.324 if eqi is None:
2025-07-01 17:49:06.324 eqi, eqj = i, j
2025-07-01 17:49:06.325 continue
2025-07-01 17:49:06.325 cruncher.set_seq1(ai)
2025-07-01 17:49:06.325 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.325 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.325 # compares by a factor of 3.
2025-07-01 17:49:06.325 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.325 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.325 # of the computation is cached by cruncher
2025-07-01 17:49:06.325 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.325 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.325 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.325 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.325 if best_ratio < cutoff:
2025-07-01 17:49:06.325 # no non-identical "pretty close" pair
2025-07-01 17:49:06.325 if eqi is None:
2025-07-01 17:49:06.325 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.325 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.325 return
2025-07-01 17:49:06.325 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.325 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.326 else:
2025-07-01 17:49:06.326 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.326 eqi = None
2025-07-01 17:49:06.326
2025-07-01 17:49:06.326 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.326 # identical
2025-07-01 17:49:06.326
2025-07-01 17:49:06.326 # pump out diffs from before the synch point
2025-07-01 17:49:06.326 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.326
2025-07-01 17:49:06.326 # do intraline marking on the synch pair
2025-07-01 17:49:06.326 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.326 if eqi is None:
2025-07-01 17:49:06.326 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.326 atags = btags = ""
2025-07-01 17:49:06.326 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.326 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.326 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.326 if tag == 'replace':
2025-07-01 17:49:06.326 atags += '^' * la
2025-07-01 17:49:06.326 btags += '^' * lb
2025-07-01 17:49:06.327 elif tag == 'delete':
2025-07-01 17:49:06.327 atags += '-' * la
2025-07-01 17:49:06.327 elif tag == 'insert':
2025-07-01 17:49:06.327 btags += '+' * lb
2025-07-01 17:49:06.327 elif tag == 'equal':
2025-07-01 17:49:06.327 atags += ' ' * la
2025-07-01 17:49:06.327 btags += ' ' * lb
2025-07-01 17:49:06.327 else:
2025-07-01 17:49:06.327 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.327 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.327 else:
2025-07-01 17:49:06.327 # the synch pair is identical
2025-07-01 17:49:06.327 yield ' ' + aelt
2025-07-01 17:49:06.327
2025-07-01 17:49:06.327 # pump out diffs from after the synch point
2025-07-01 17:49:06.327 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.327
2025-07-01 17:49:06.327 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.327 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.327
2025-07-01 17:49:06.327 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.328 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.328 alo = 127, ahi = 1101
2025-07-01 17:49:06.328 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.328 blo = 127, bhi = 1101
2025-07-01 17:49:06.328
2025-07-01 17:49:06.328 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.328 g = []
2025-07-01 17:49:06.328 if alo < ahi:
2025-07-01 17:49:06.328 if blo < bhi:
2025-07-01 17:49:06.328 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.328 else:
2025-07-01 17:49:06.328 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.328 elif blo < bhi:
2025-07-01 17:49:06.328 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.328
2025-07-01 17:49:06.328 > yield from g
2025-07-01 17:49:06.328
2025-07-01 17:49:06.328 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.328 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.328
2025-07-01 17:49:06.328 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.328 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.329 alo = 127, ahi = 1101
2025-07-01 17:49:06.329 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.329 blo = 127, bhi = 1101
2025-07-01 17:49:06.329
2025-07-01 17:49:06.329 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.329 r"""
2025-07-01 17:49:06.329 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.329 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.329 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.329 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.329
2025-07-01 17:49:06.329 Example:
2025-07-01 17:49:06.329
2025-07-01 17:49:06.329 >>> d = Differ()
2025-07-01 17:49:06.329 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.329 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.329 >>> print(''.join(results), end="")
2025-07-01 17:49:06.329 - abcDefghiJkl
2025-07-01 17:49:06.329 + abcdefGhijkl
2025-07-01 17:49:06.330 """
2025-07-01 17:49:06.330
2025-07-01 17:49:06.330 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.330 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.330 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.330 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.330 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.330
2025-07-01 17:49:06.330 # search for the pair that matches best without being identical
2025-07-01 17:49:06.330 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.330 # on junk -- unless we have to)
2025-07-01 17:49:06.330 for j in range(blo, bhi):
2025-07-01 17:49:06.330 bj = b[j]
2025-07-01 17:49:06.330 cruncher.set_seq2(bj)
2025-07-01 17:49:06.330 for i in range(alo, ahi):
2025-07-01 17:49:06.330 ai = a[i]
2025-07-01 17:49:06.330 if ai == bj:
2025-07-01 17:49:06.330 if eqi is None:
2025-07-01 17:49:06.330 eqi, eqj = i, j
2025-07-01 17:49:06.330 continue
2025-07-01 17:49:06.330 cruncher.set_seq1(ai)
2025-07-01 17:49:06.330 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.331 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.331 # compares by a factor of 3.
2025-07-01 17:49:06.331 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.331 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.331 # of the computation is cached by cruncher
2025-07-01 17:49:06.331 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.331 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.331 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.331 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.331 if best_ratio < cutoff:
2025-07-01 17:49:06.331 # no non-identical "pretty close" pair
2025-07-01 17:49:06.331 if eqi is None:
2025-07-01 17:49:06.331 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.331 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.331 return
2025-07-01 17:49:06.331 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.331 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.331 else:
2025-07-01 17:49:06.331 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.332 eqi = None
2025-07-01 17:49:06.332
2025-07-01 17:49:06.332 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.332 # identical
2025-07-01 17:49:06.332
2025-07-01 17:49:06.332 # pump out diffs from before the synch point
2025-07-01 17:49:06.332 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.332
2025-07-01 17:49:06.332 # do intraline marking on the synch pair
2025-07-01 17:49:06.332 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.332 if eqi is None:
2025-07-01 17:49:06.332 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.332 atags = btags = ""
2025-07-01 17:49:06.332 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.332 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.332 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.332 if tag == 'replace':
2025-07-01 17:49:06.332 atags += '^' * la
2025-07-01 17:49:06.332 btags += '^' * lb
2025-07-01 17:49:06.332 elif tag == 'delete':
2025-07-01 17:49:06.332 atags += '-' * la
2025-07-01 17:49:06.333 elif tag == 'insert':
2025-07-01 17:49:06.333 btags += '+' * lb
2025-07-01 17:49:06.333 elif tag == 'equal':
2025-07-01 17:49:06.333 atags += ' ' * la
2025-07-01 17:49:06.333 btags += ' ' * lb
2025-07-01 17:49:06.333 else:
2025-07-01 17:49:06.333 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.333 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.333 else:
2025-07-01 17:49:06.333 # the synch pair is identical
2025-07-01 17:49:06.333 yield ' ' + aelt
2025-07-01 17:49:06.333
2025-07-01 17:49:06.333 # pump out diffs from after the synch point
2025-07-01 17:49:06.333 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.333
2025-07-01 17:49:06.333 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.333 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.333
2025-07-01 17:49:06.333 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.333 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.333 alo = 128, ahi = 1101
2025-07-01 17:49:06.334 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.334 blo = 128, bhi = 1101
2025-07-01 17:49:06.334
2025-07-01 17:49:06.334 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.334 g = []
2025-07-01 17:49:06.334 if alo < ahi:
2025-07-01 17:49:06.334 if blo < bhi:
2025-07-01 17:49:06.334 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.334 else:
2025-07-01 17:49:06.334 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.334 elif blo < bhi:
2025-07-01 17:49:06.334 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.334
2025-07-01 17:49:06.334 > yield from g
2025-07-01 17:49:06.334
2025-07-01 17:49:06.334 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.334 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.334
2025-07-01 17:49:06.334 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.334 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.334 alo = 128, ahi = 1101
2025-07-01 17:49:06.334 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.338 blo = 128, bhi = 1101
2025-07-01 17:49:06.338
2025-07-01 17:49:06.338 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.338 r"""
2025-07-01 17:49:06.338 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.338 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.338 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.338 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.338
2025-07-01 17:49:06.338 Example:
2025-07-01 17:49:06.338
2025-07-01 17:49:06.338 >>> d = Differ()
2025-07-01 17:49:06.338 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.338 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.338 >>> print(''.join(results), end="")
2025-07-01 17:49:06.338 - abcDefghiJkl
2025-07-01 17:49:06.338 + abcdefGhijkl
2025-07-01 17:49:06.338 """
2025-07-01 17:49:06.339
2025-07-01 17:49:06.339 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.339 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.339 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.339 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.339 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.339
2025-07-01 17:49:06.339 # search for the pair that matches best without being identical
2025-07-01 17:49:06.339 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.339 # on junk -- unless we have to)
2025-07-01 17:49:06.339 for j in range(blo, bhi):
2025-07-01 17:49:06.339 bj = b[j]
2025-07-01 17:49:06.339 cruncher.set_seq2(bj)
2025-07-01 17:49:06.339 for i in range(alo, ahi):
2025-07-01 17:49:06.339 ai = a[i]
2025-07-01 17:49:06.339 if ai == bj:
2025-07-01 17:49:06.339 if eqi is None:
2025-07-01 17:49:06.339 eqi, eqj = i, j
2025-07-01 17:49:06.339 continue
2025-07-01 17:49:06.339 cruncher.set_seq1(ai)
2025-07-01 17:49:06.339 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.339 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.340 # compares by a factor of 3.
2025-07-01 17:49:06.340 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.340 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.340 # of the computation is cached by cruncher
2025-07-01 17:49:06.340 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.340 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.340 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.340 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.340 if best_ratio < cutoff:
2025-07-01 17:49:06.340 # no non-identical "pretty close" pair
2025-07-01 17:49:06.340 if eqi is None:
2025-07-01 17:49:06.340 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.340 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.340 return
2025-07-01 17:49:06.340 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.340 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.340 else:
2025-07-01 17:49:06.340 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.340 eqi = None
2025-07-01 17:49:06.340
2025-07-01 17:49:06.340 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.341 # identical
2025-07-01 17:49:06.341
2025-07-01 17:49:06.341 # pump out diffs from before the synch point
2025-07-01 17:49:06.341 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.341
2025-07-01 17:49:06.341 # do intraline marking on the synch pair
2025-07-01 17:49:06.341 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.341 if eqi is None:
2025-07-01 17:49:06.341 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.341 atags = btags = ""
2025-07-01 17:49:06.341 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.341 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.341 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.341 if tag == 'replace':
2025-07-01 17:49:06.341 atags += '^' * la
2025-07-01 17:49:06.341 btags += '^' * lb
2025-07-01 17:49:06.341 elif tag == 'delete':
2025-07-01 17:49:06.341 atags += '-' * la
2025-07-01 17:49:06.341 elif tag == 'insert':
2025-07-01 17:49:06.341 btags += '+' * lb
2025-07-01 17:49:06.341 elif tag == 'equal':
2025-07-01 17:49:06.342 atags += ' ' * la
2025-07-01 17:49:06.342 btags += ' ' * lb
2025-07-01 17:49:06.342 else:
2025-07-01 17:49:06.342 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.342 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.342 else:
2025-07-01 17:49:06.342 # the synch pair is identical
2025-07-01 17:49:06.342 yield ' ' + aelt
2025-07-01 17:49:06.342
2025-07-01 17:49:06.342 # pump out diffs from after the synch point
2025-07-01 17:49:06.342 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.342
2025-07-01 17:49:06.342 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.342 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.342
2025-07-01 17:49:06.342 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.342 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.342 alo = 129, ahi = 1101
2025-07-01 17:49:06.343 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.343 blo = 129, bhi = 1101
2025-07-01 17:49:06.343
2025-07-01 17:49:06.343 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.343 g = []
2025-07-01 17:49:06.343 if alo < ahi:
2025-07-01 17:49:06.343 if blo < bhi:
2025-07-01 17:49:06.343 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.343 else:
2025-07-01 17:49:06.343 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.343 elif blo < bhi:
2025-07-01 17:49:06.343 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.343
2025-07-01 17:49:06.343 > yield from g
2025-07-01 17:49:06.343
2025-07-01 17:49:06.343 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.343 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.343
2025-07-01 17:49:06.343 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.343 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.344 alo = 129, ahi = 1101
2025-07-01 17:49:06.344 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.344 blo = 129, bhi = 1101
2025-07-01 17:49:06.344
2025-07-01 17:49:06.344 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.344 r"""
2025-07-01 17:49:06.344 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.344 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.344 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.344 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.344
2025-07-01 17:49:06.344 Example:
2025-07-01 17:49:06.344
2025-07-01 17:49:06.344 >>> d = Differ()
2025-07-01 17:49:06.344 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.344 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.344 >>> print(''.join(results), end="")
2025-07-01 17:49:06.344 - abcDefghiJkl
2025-07-01 17:49:06.344 + abcdefGhijkl
2025-07-01 17:49:06.345 """
2025-07-01 17:49:06.345
2025-07-01 17:49:06.345 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.345 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.345 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.345 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.345 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.345
2025-07-01 17:49:06.345 # search for the pair that matches best without being identical
2025-07-01 17:49:06.345 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.345 # on junk -- unless we have to)
2025-07-01 17:49:06.345 for j in range(blo, bhi):
2025-07-01 17:49:06.345 bj = b[j]
2025-07-01 17:49:06.345 cruncher.set_seq2(bj)
2025-07-01 17:49:06.345 for i in range(alo, ahi):
2025-07-01 17:49:06.345 ai = a[i]
2025-07-01 17:49:06.345 if ai == bj:
2025-07-01 17:49:06.345 if eqi is None:
2025-07-01 17:49:06.345 eqi, eqj = i, j
2025-07-01 17:49:06.345 continue
2025-07-01 17:49:06.346 cruncher.set_seq1(ai)
2025-07-01 17:49:06.346 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.346 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.346 # compares by a factor of 3.
2025-07-01 17:49:06.346 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.346 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.346 # of the computation is cached by cruncher
2025-07-01 17:49:06.346 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.346 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.346 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.346 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.346 if best_ratio < cutoff:
2025-07-01 17:49:06.346 # no non-identical "pretty close" pair
2025-07-01 17:49:06.346 if eqi is None:
2025-07-01 17:49:06.346 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.346 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.346 return
2025-07-01 17:49:06.346 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.346 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.346 else:
2025-07-01 17:49:06.346 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.347 eqi = None
2025-07-01 17:49:06.347
2025-07-01 17:49:06.347 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.347 # identical
2025-07-01 17:49:06.347
2025-07-01 17:49:06.347 # pump out diffs from before the synch point
2025-07-01 17:49:06.347 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.347
2025-07-01 17:49:06.347 # do intraline marking on the synch pair
2025-07-01 17:49:06.347 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.347 if eqi is None:
2025-07-01 17:49:06.347 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.347 atags = btags = ""
2025-07-01 17:49:06.347 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.347 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.347 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.347 if tag == 'replace':
2025-07-01 17:49:06.347 atags += '^' * la
2025-07-01 17:49:06.347 btags += '^' * lb
2025-07-01 17:49:06.347 elif tag == 'delete':
2025-07-01 17:49:06.347 atags += '-' * la
2025-07-01 17:49:06.348 elif tag == 'insert':
2025-07-01 17:49:06.348 btags += '+' * lb
2025-07-01 17:49:06.348 elif tag == 'equal':
2025-07-01 17:49:06.348 atags += ' ' * la
2025-07-01 17:49:06.348 btags += ' ' * lb
2025-07-01 17:49:06.348 else:
2025-07-01 17:49:06.348 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.348 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.348 else:
2025-07-01 17:49:06.348 # the synch pair is identical
2025-07-01 17:49:06.348 yield ' ' + aelt
2025-07-01 17:49:06.348
2025-07-01 17:49:06.348 # pump out diffs from after the synch point
2025-07-01 17:49:06.348 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.348
2025-07-01 17:49:06.348 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.348 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.348
2025-07-01 17:49:06.348 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.348 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.348 alo = 130, ahi = 1101
2025-07-01 17:49:06.349 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.349 blo = 130, bhi = 1101
2025-07-01 17:49:06.349
2025-07-01 17:49:06.349 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.349 g = []
2025-07-01 17:49:06.349 if alo < ahi:
2025-07-01 17:49:06.349 if blo < bhi:
2025-07-01 17:49:06.349 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.349 else:
2025-07-01 17:49:06.349 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.349 elif blo < bhi:
2025-07-01 17:49:06.349 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.349
2025-07-01 17:49:06.349 > yield from g
2025-07-01 17:49:06.349
2025-07-01 17:49:06.349 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.349 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.349
2025-07-01 17:49:06.349 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.349 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.350 alo = 130, ahi = 1101
2025-07-01 17:49:06.350 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.350 blo = 130, bhi = 1101
2025-07-01 17:49:06.350
2025-07-01 17:49:06.350 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.350 r"""
2025-07-01 17:49:06.350 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.350 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.350 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.350 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.350
2025-07-01 17:49:06.350 Example:
2025-07-01 17:49:06.350
2025-07-01 17:49:06.350 >>> d = Differ()
2025-07-01 17:49:06.350 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.350 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.350 >>> print(''.join(results), end="")
2025-07-01 17:49:06.350 - abcDefghiJkl
2025-07-01 17:49:06.350 + abcdefGhijkl
2025-07-01 17:49:06.353 """
2025-07-01 17:49:06.354
2025-07-01 17:49:06.354 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.354 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.354 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.354 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.354 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.354
2025-07-01 17:49:06.354 # search for the pair that matches best without being identical
2025-07-01 17:49:06.354 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.354 # on junk -- unless we have to)
2025-07-01 17:49:06.354 for j in range(blo, bhi):
2025-07-01 17:49:06.354 bj = b[j]
2025-07-01 17:49:06.354 cruncher.set_seq2(bj)
2025-07-01 17:49:06.354 for i in range(alo, ahi):
2025-07-01 17:49:06.354 ai = a[i]
2025-07-01 17:49:06.354 if ai == bj:
2025-07-01 17:49:06.354 if eqi is None:
2025-07-01 17:49:06.354 eqi, eqj = i, j
2025-07-01 17:49:06.354 continue
2025-07-01 17:49:06.354 cruncher.set_seq1(ai)
2025-07-01 17:49:06.354 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.355 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.355 # compares by a factor of 3.
2025-07-01 17:49:06.355 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.355 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.355 # of the computation is cached by cruncher
2025-07-01 17:49:06.355 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.355 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.355 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.355 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.355 if best_ratio < cutoff:
2025-07-01 17:49:06.355 # no non-identical "pretty close" pair
2025-07-01 17:49:06.355 if eqi is None:
2025-07-01 17:49:06.355 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.355 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.355 return
2025-07-01 17:49:06.355 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.355 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.355 else:
2025-07-01 17:49:06.355 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.355 eqi = None
2025-07-01 17:49:06.356
2025-07-01 17:49:06.356 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.356 # identical
2025-07-01 17:49:06.356
2025-07-01 17:49:06.356 # pump out diffs from before the synch point
2025-07-01 17:49:06.356 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.356
2025-07-01 17:49:06.356 # do intraline marking on the synch pair
2025-07-01 17:49:06.356 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.356 if eqi is None:
2025-07-01 17:49:06.356 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.356 atags = btags = ""
2025-07-01 17:49:06.356 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.356 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.356 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.356 if tag == 'replace':
2025-07-01 17:49:06.356 atags += '^' * la
2025-07-01 17:49:06.356 btags += '^' * lb
2025-07-01 17:49:06.356 elif tag == 'delete':
2025-07-01 17:49:06.356 atags += '-' * la
2025-07-01 17:49:06.356 elif tag == 'insert':
2025-07-01 17:49:06.357 btags += '+' * lb
2025-07-01 17:49:06.357 elif tag == 'equal':
2025-07-01 17:49:06.357 atags += ' ' * la
2025-07-01 17:49:06.357 btags += ' ' * lb
2025-07-01 17:49:06.357 else:
2025-07-01 17:49:06.357 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.357 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.357 else:
2025-07-01 17:49:06.357 # the synch pair is identical
2025-07-01 17:49:06.357 yield ' ' + aelt
2025-07-01 17:49:06.357
2025-07-01 17:49:06.357 # pump out diffs from after the synch point
2025-07-01 17:49:06.357 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.357
2025-07-01 17:49:06.357 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.357 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.357
2025-07-01 17:49:06.357 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.357 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.357 alo = 131, ahi = 1101
2025-07-01 17:49:06.358 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.358 blo = 131, bhi = 1101
2025-07-01 17:49:06.358
2025-07-01 17:49:06.358 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.358 g = []
2025-07-01 17:49:06.358 if alo < ahi:
2025-07-01 17:49:06.358 if blo < bhi:
2025-07-01 17:49:06.358 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.358 else:
2025-07-01 17:49:06.358 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.358 elif blo < bhi:
2025-07-01 17:49:06.358 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.358
2025-07-01 17:49:06.358 > yield from g
2025-07-01 17:49:06.358
2025-07-01 17:49:06.358 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.358 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.358
2025-07-01 17:49:06.358 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.358 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.359 alo = 131, ahi = 1101
2025-07-01 17:49:06.359 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.359 blo = 131, bhi = 1101
2025-07-01 17:49:06.359
2025-07-01 17:49:06.359 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.359 r"""
2025-07-01 17:49:06.359 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.359 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.359 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.359 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.359
2025-07-01 17:49:06.359 Example:
2025-07-01 17:49:06.359
2025-07-01 17:49:06.359 >>> d = Differ()
2025-07-01 17:49:06.359 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.359 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.359 >>> print(''.join(results), end="")
2025-07-01 17:49:06.359 - abcDefghiJkl
2025-07-01 17:49:06.359 + abcdefGhijkl
2025-07-01 17:49:06.360 """
2025-07-01 17:49:06.360
2025-07-01 17:49:06.360 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.360 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.360 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.360 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.360 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.360
2025-07-01 17:49:06.360 # search for the pair that matches best without being identical
2025-07-01 17:49:06.360 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.360 # on junk -- unless we have to)
2025-07-01 17:49:06.360 for j in range(blo, bhi):
2025-07-01 17:49:06.360 bj = b[j]
2025-07-01 17:49:06.360 cruncher.set_seq2(bj)
2025-07-01 17:49:06.360 for i in range(alo, ahi):
2025-07-01 17:49:06.360 ai = a[i]
2025-07-01 17:49:06.360 if ai == bj:
2025-07-01 17:49:06.360 if eqi is None:
2025-07-01 17:49:06.360 eqi, eqj = i, j
2025-07-01 17:49:06.360 continue
2025-07-01 17:49:06.360 cruncher.set_seq1(ai)
2025-07-01 17:49:06.361 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.361 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.361 # compares by a factor of 3.
2025-07-01 17:49:06.361 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.361 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.361 # of the computation is cached by cruncher
2025-07-01 17:49:06.361 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.361 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.361 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.361 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.361 if best_ratio < cutoff:
2025-07-01 17:49:06.361 # no non-identical "pretty close" pair
2025-07-01 17:49:06.361 if eqi is None:
2025-07-01 17:49:06.361 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.361 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.361 return
2025-07-01 17:49:06.361 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.361 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.361 else:
2025-07-01 17:49:06.361 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.362 eqi = None
2025-07-01 17:49:06.362
2025-07-01 17:49:06.362 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.362 # identical
2025-07-01 17:49:06.362
2025-07-01 17:49:06.362 # pump out diffs from before the synch point
2025-07-01 17:49:06.362 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.362
2025-07-01 17:49:06.362 # do intraline marking on the synch pair
2025-07-01 17:49:06.362 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.362 if eqi is None:
2025-07-01 17:49:06.362 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.362 atags = btags = ""
2025-07-01 17:49:06.362 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.362 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.362 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.362 if tag == 'replace':
2025-07-01 17:49:06.362 atags += '^' * la
2025-07-01 17:49:06.362 btags += '^' * lb
2025-07-01 17:49:06.362 elif tag == 'delete':
2025-07-01 17:49:06.362 atags += '-' * la
2025-07-01 17:49:06.363 elif tag == 'insert':
2025-07-01 17:49:06.363 btags += '+' * lb
2025-07-01 17:49:06.363 elif tag == 'equal':
2025-07-01 17:49:06.363 atags += ' ' * la
2025-07-01 17:49:06.363 btags += ' ' * lb
2025-07-01 17:49:06.363 else:
2025-07-01 17:49:06.363 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.363 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.363 else:
2025-07-01 17:49:06.363 # the synch pair is identical
2025-07-01 17:49:06.363 yield ' ' + aelt
2025-07-01 17:49:06.363
2025-07-01 17:49:06.363 # pump out diffs from after the synch point
2025-07-01 17:49:06.363 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.363
2025-07-01 17:49:06.363 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.363 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.363
2025-07-01 17:49:06.363 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.363 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.363 alo = 132, ahi = 1101
2025-07-01 17:49:06.364 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.364 blo = 132, bhi = 1101
2025-07-01 17:49:06.364
2025-07-01 17:49:06.364 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.364 g = []
2025-07-01 17:49:06.364 if alo < ahi:
2025-07-01 17:49:06.364 if blo < bhi:
2025-07-01 17:49:06.364 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.364 else:
2025-07-01 17:49:06.364 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.364 elif blo < bhi:
2025-07-01 17:49:06.364 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.364
2025-07-01 17:49:06.364 > yield from g
2025-07-01 17:49:06.364
2025-07-01 17:49:06.364 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.364 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.364
2025-07-01 17:49:06.364 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.364 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.364 alo = 132, ahi = 1101
2025-07-01 17:49:06.365 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.365 blo = 132, bhi = 1101
2025-07-01 17:49:06.365
2025-07-01 17:49:06.365 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.365 r"""
2025-07-01 17:49:06.365 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.365 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.365 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.365 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.365
2025-07-01 17:49:06.365 Example:
2025-07-01 17:49:06.365
2025-07-01 17:49:06.365 >>> d = Differ()
2025-07-01 17:49:06.365 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.365 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.365 >>> print(''.join(results), end="")
2025-07-01 17:49:06.365 - abcDefghiJkl
2025-07-01 17:49:06.365 + abcdefGhijkl
2025-07-01 17:49:06.366 """
2025-07-01 17:49:06.366
2025-07-01 17:49:06.368 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.368 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.368 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.368 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.368 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.368
2025-07-01 17:49:06.368 # search for the pair that matches best without being identical
2025-07-01 17:49:06.368 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.368 # on junk -- unless we have to)
2025-07-01 17:49:06.368 for j in range(blo, bhi):
2025-07-01 17:49:06.368 bj = b[j]
2025-07-01 17:49:06.368 cruncher.set_seq2(bj)
2025-07-01 17:49:06.368 for i in range(alo, ahi):
2025-07-01 17:49:06.368 ai = a[i]
2025-07-01 17:49:06.368 if ai == bj:
2025-07-01 17:49:06.368 if eqi is None:
2025-07-01 17:49:06.368 eqi, eqj = i, j
2025-07-01 17:49:06.368 continue
2025-07-01 17:49:06.368 cruncher.set_seq1(ai)
2025-07-01 17:49:06.369 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.369 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.369 # compares by a factor of 3.
2025-07-01 17:49:06.369 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.369 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.369 # of the computation is cached by cruncher
2025-07-01 17:49:06.369 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.369 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.369 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.369 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.369 if best_ratio < cutoff:
2025-07-01 17:49:06.369 # no non-identical "pretty close" pair
2025-07-01 17:49:06.369 if eqi is None:
2025-07-01 17:49:06.369 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.369 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.369 return
2025-07-01 17:49:06.369 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.369 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.369 else:
2025-07-01 17:49:06.369 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.370 eqi = None
2025-07-01 17:49:06.370
2025-07-01 17:49:06.370 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.370 # identical
2025-07-01 17:49:06.370
2025-07-01 17:49:06.370 # pump out diffs from before the synch point
2025-07-01 17:49:06.370 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.370
2025-07-01 17:49:06.370 # do intraline marking on the synch pair
2025-07-01 17:49:06.370 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.370 if eqi is None:
2025-07-01 17:49:06.370 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.370 atags = btags = ""
2025-07-01 17:49:06.370 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.370 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.370 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.370 if tag == 'replace':
2025-07-01 17:49:06.370 atags += '^' * la
2025-07-01 17:49:06.370 btags += '^' * lb
2025-07-01 17:49:06.370 elif tag == 'delete':
2025-07-01 17:49:06.370 atags += '-' * la
2025-07-01 17:49:06.371 elif tag == 'insert':
2025-07-01 17:49:06.371 btags += '+' * lb
2025-07-01 17:49:06.371 elif tag == 'equal':
2025-07-01 17:49:06.371 atags += ' ' * la
2025-07-01 17:49:06.371 btags += ' ' * lb
2025-07-01 17:49:06.371 else:
2025-07-01 17:49:06.371 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.371 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.371 else:
2025-07-01 17:49:06.371 # the synch pair is identical
2025-07-01 17:49:06.371 yield ' ' + aelt
2025-07-01 17:49:06.371
2025-07-01 17:49:06.371 # pump out diffs from after the synch point
2025-07-01 17:49:06.371 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.371
2025-07-01 17:49:06.371 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.371 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.371
2025-07-01 17:49:06.371 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.371 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.372 alo = 133, ahi = 1101
2025-07-01 17:49:06.372 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.372 blo = 133, bhi = 1101
2025-07-01 17:49:06.372
2025-07-01 17:49:06.372 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.372 g = []
2025-07-01 17:49:06.372 if alo < ahi:
2025-07-01 17:49:06.372 if blo < bhi:
2025-07-01 17:49:06.372 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.372 else:
2025-07-01 17:49:06.372 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.372 elif blo < bhi:
2025-07-01 17:49:06.372 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.372
2025-07-01 17:49:06.372 > yield from g
2025-07-01 17:49:06.372
2025-07-01 17:49:06.372 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.372 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.372
2025-07-01 17:49:06.372 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.373 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.373 alo = 133, ahi = 1101
2025-07-01 17:49:06.373 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.373 blo = 133, bhi = 1101
2025-07-01 17:49:06.373
2025-07-01 17:49:06.373 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.373 r"""
2025-07-01 17:49:06.373 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.373 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.373 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.373 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.373
2025-07-01 17:49:06.373 Example:
2025-07-01 17:49:06.373
2025-07-01 17:49:06.373 >>> d = Differ()
2025-07-01 17:49:06.373 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.373 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.373 >>> print(''.join(results), end="")
2025-07-01 17:49:06.373 - abcDefghiJkl
2025-07-01 17:49:06.374 + abcdefGhijkl
2025-07-01 17:49:06.374 """
2025-07-01 17:49:06.374
2025-07-01 17:49:06.374 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.374 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.374 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.374 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.374 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.374
2025-07-01 17:49:06.374 # search for the pair that matches best without being identical
2025-07-01 17:49:06.374 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.374 # on junk -- unless we have to)
2025-07-01 17:49:06.374 for j in range(blo, bhi):
2025-07-01 17:49:06.374 bj = b[j]
2025-07-01 17:49:06.374 cruncher.set_seq2(bj)
2025-07-01 17:49:06.374 for i in range(alo, ahi):
2025-07-01 17:49:06.374 ai = a[i]
2025-07-01 17:49:06.374 if ai == bj:
2025-07-01 17:49:06.374 if eqi is None:
2025-07-01 17:49:06.375 eqi, eqj = i, j
2025-07-01 17:49:06.375 continue
2025-07-01 17:49:06.375 cruncher.set_seq1(ai)
2025-07-01 17:49:06.375 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.375 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.375 # compares by a factor of 3.
2025-07-01 17:49:06.375 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.375 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.375 # of the computation is cached by cruncher
2025-07-01 17:49:06.375 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.375 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.375 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.375 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.375 if best_ratio < cutoff:
2025-07-01 17:49:06.375 # no non-identical "pretty close" pair
2025-07-01 17:49:06.375 if eqi is None:
2025-07-01 17:49:06.375 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.375 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.375 return
2025-07-01 17:49:06.375 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.375 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.376 else:
2025-07-01 17:49:06.376 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.376 eqi = None
2025-07-01 17:49:06.376
2025-07-01 17:49:06.376 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.376 # identical
2025-07-01 17:49:06.376
2025-07-01 17:49:06.376 # pump out diffs from before the synch point
2025-07-01 17:49:06.376 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.376
2025-07-01 17:49:06.376 # do intraline marking on the synch pair
2025-07-01 17:49:06.376 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.376 if eqi is None:
2025-07-01 17:49:06.376 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.376 atags = btags = ""
2025-07-01 17:49:06.376 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.376 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.376 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.376 if tag == 'replace':
2025-07-01 17:49:06.376 atags += '^' * la
2025-07-01 17:49:06.376 btags += '^' * lb
2025-07-01 17:49:06.376 elif tag == 'delete':
2025-07-01 17:49:06.377 atags += '-' * la
2025-07-01 17:49:06.377 elif tag == 'insert':
2025-07-01 17:49:06.377 btags += '+' * lb
2025-07-01 17:49:06.377 elif tag == 'equal':
2025-07-01 17:49:06.377 atags += ' ' * la
2025-07-01 17:49:06.377 btags += ' ' * lb
2025-07-01 17:49:06.377 else:
2025-07-01 17:49:06.377 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.377 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.377 else:
2025-07-01 17:49:06.377 # the synch pair is identical
2025-07-01 17:49:06.377 yield ' ' + aelt
2025-07-01 17:49:06.377
2025-07-01 17:49:06.377 # pump out diffs from after the synch point
2025-07-01 17:49:06.377 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.377
2025-07-01 17:49:06.377 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.377 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.377
2025-07-01 17:49:06.377 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.377 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.378 alo = 136, ahi = 1101
2025-07-01 17:49:06.378 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.378 blo = 136, bhi = 1101
2025-07-01 17:49:06.378
2025-07-01 17:49:06.378 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.378 g = []
2025-07-01 17:49:06.378 if alo < ahi:
2025-07-01 17:49:06.378 if blo < bhi:
2025-07-01 17:49:06.378 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.378 else:
2025-07-01 17:49:06.378 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.378 elif blo < bhi:
2025-07-01 17:49:06.378 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.378
2025-07-01 17:49:06.378 > yield from g
2025-07-01 17:49:06.378
2025-07-01 17:49:06.378 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.378 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.378
2025-07-01 17:49:06.378 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.378 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.378 alo = 136, ahi = 1101
2025-07-01 17:49:06.379 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.379 blo = 136, bhi = 1101
2025-07-01 17:49:06.379
2025-07-01 17:49:06.379 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.379 r"""
2025-07-01 17:49:06.379 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.379 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.379 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.379 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.379
2025-07-01 17:49:06.379 Example:
2025-07-01 17:49:06.379
2025-07-01 17:49:06.379 >>> d = Differ()
2025-07-01 17:49:06.379 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.379 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.379 >>> print(''.join(results), end="")
2025-07-01 17:49:06.379 - abcDefghiJkl
2025-07-01 17:49:06.379 + abcdefGhijkl
2025-07-01 17:49:06.379 """
2025-07-01 17:49:06.380
2025-07-01 17:49:06.380 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.380 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.380 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.380 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.380 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.380
2025-07-01 17:49:06.380 # search for the pair that matches best without being identical
2025-07-01 17:49:06.380 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.380 # on junk -- unless we have to)
2025-07-01 17:49:06.380 for j in range(blo, bhi):
2025-07-01 17:49:06.380 bj = b[j]
2025-07-01 17:49:06.380 cruncher.set_seq2(bj)
2025-07-01 17:49:06.380 for i in range(alo, ahi):
2025-07-01 17:49:06.380 ai = a[i]
2025-07-01 17:49:06.380 if ai == bj:
2025-07-01 17:49:06.380 if eqi is None:
2025-07-01 17:49:06.380 eqi, eqj = i, j
2025-07-01 17:49:06.380 continue
2025-07-01 17:49:06.380 cruncher.set_seq1(ai)
2025-07-01 17:49:06.380 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.381 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.384 # compares by a factor of 3.
2025-07-01 17:49:06.384 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.384 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.384 # of the computation is cached by cruncher
2025-07-01 17:49:06.384 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.384 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.384 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.384 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.384 if best_ratio < cutoff:
2025-07-01 17:49:06.384 # no non-identical "pretty close" pair
2025-07-01 17:49:06.384 if eqi is None:
2025-07-01 17:49:06.384 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.384 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.384 return
2025-07-01 17:49:06.384 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.384 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.384 else:
2025-07-01 17:49:06.384 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.384 eqi = None
2025-07-01 17:49:06.384
2025-07-01 17:49:06.385 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.385 # identical
2025-07-01 17:49:06.385
2025-07-01 17:49:06.385 # pump out diffs from before the synch point
2025-07-01 17:49:06.385 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.385
2025-07-01 17:49:06.385 # do intraline marking on the synch pair
2025-07-01 17:49:06.385 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.385 if eqi is None:
2025-07-01 17:49:06.385 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.385 atags = btags = ""
2025-07-01 17:49:06.385 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.385 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.385 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.385 if tag == 'replace':
2025-07-01 17:49:06.385 atags += '^' * la
2025-07-01 17:49:06.385 btags += '^' * lb
2025-07-01 17:49:06.385 elif tag == 'delete':
2025-07-01 17:49:06.385 atags += '-' * la
2025-07-01 17:49:06.385 elif tag == 'insert':
2025-07-01 17:49:06.385 btags += '+' * lb
2025-07-01 17:49:06.386 elif tag == 'equal':
2025-07-01 17:49:06.386 atags += ' ' * la
2025-07-01 17:49:06.386 btags += ' ' * lb
2025-07-01 17:49:06.386 else:
2025-07-01 17:49:06.386 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.386 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.386 else:
2025-07-01 17:49:06.386 # the synch pair is identical
2025-07-01 17:49:06.386 yield ' ' + aelt
2025-07-01 17:49:06.386
2025-07-01 17:49:06.386 # pump out diffs from after the synch point
2025-07-01 17:49:06.386 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.386
2025-07-01 17:49:06.386 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.386 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.386
2025-07-01 17:49:06.386 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.386 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.386 alo = 137, ahi = 1101
2025-07-01 17:49:06.386 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.387 blo = 137, bhi = 1101
2025-07-01 17:49:06.387
2025-07-01 17:49:06.387 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.387 g = []
2025-07-01 17:49:06.387 if alo < ahi:
2025-07-01 17:49:06.387 if blo < bhi:
2025-07-01 17:49:06.387 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.387 else:
2025-07-01 17:49:06.387 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.387 elif blo < bhi:
2025-07-01 17:49:06.387 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.387
2025-07-01 17:49:06.387 > yield from g
2025-07-01 17:49:06.387
2025-07-01 17:49:06.387 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.387 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.387
2025-07-01 17:49:06.387 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.387 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.388 alo = 137, ahi = 1101
2025-07-01 17:49:06.388 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.388 blo = 137, bhi = 1101
2025-07-01 17:49:06.388
2025-07-01 17:49:06.388 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.388 r"""
2025-07-01 17:49:06.388 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.388 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.388 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.388 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.388
2025-07-01 17:49:06.388 Example:
2025-07-01 17:49:06.388
2025-07-01 17:49:06.388 >>> d = Differ()
2025-07-01 17:49:06.388 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.388 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.388 >>> print(''.join(results), end="")
2025-07-01 17:49:06.388 - abcDefghiJkl
2025-07-01 17:49:06.388 + abcdefGhijkl
2025-07-01 17:49:06.389 """
2025-07-01 17:49:06.389
2025-07-01 17:49:06.389 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.389 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.389 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.389 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.389 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.389
2025-07-01 17:49:06.389 # search for the pair that matches best without being identical
2025-07-01 17:49:06.389 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.389 # on junk -- unless we have to)
2025-07-01 17:49:06.389 for j in range(blo, bhi):
2025-07-01 17:49:06.389 bj = b[j]
2025-07-01 17:49:06.389 cruncher.set_seq2(bj)
2025-07-01 17:49:06.389 for i in range(alo, ahi):
2025-07-01 17:49:06.389 ai = a[i]
2025-07-01 17:49:06.389 if ai == bj:
2025-07-01 17:49:06.389 if eqi is None:
2025-07-01 17:49:06.389 eqi, eqj = i, j
2025-07-01 17:49:06.389 continue
2025-07-01 17:49:06.390 cruncher.set_seq1(ai)
2025-07-01 17:49:06.390 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.390 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.390 # compares by a factor of 3.
2025-07-01 17:49:06.390 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.390 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.390 # of the computation is cached by cruncher
2025-07-01 17:49:06.390 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.390 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.390 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.390 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.390 if best_ratio < cutoff:
2025-07-01 17:49:06.390 # no non-identical "pretty close" pair
2025-07-01 17:49:06.390 if eqi is None:
2025-07-01 17:49:06.390 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.390 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.390 return
2025-07-01 17:49:06.390 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.390 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.390 else:
2025-07-01 17:49:06.390 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.391 eqi = None
2025-07-01 17:49:06.391
2025-07-01 17:49:06.391 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.391 # identical
2025-07-01 17:49:06.391
2025-07-01 17:49:06.391 # pump out diffs from before the synch point
2025-07-01 17:49:06.391 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.391
2025-07-01 17:49:06.391 # do intraline marking on the synch pair
2025-07-01 17:49:06.391 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.391 if eqi is None:
2025-07-01 17:49:06.391 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.391 atags = btags = ""
2025-07-01 17:49:06.391 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.391 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.391 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.391 if tag == 'replace':
2025-07-01 17:49:06.391 atags += '^' * la
2025-07-01 17:49:06.391 btags += '^' * lb
2025-07-01 17:49:06.392 elif tag == 'delete':
2025-07-01 17:49:06.392 atags += '-' * la
2025-07-01 17:49:06.392 elif tag == 'insert':
2025-07-01 17:49:06.392 btags += '+' * lb
2025-07-01 17:49:06.392 elif tag == 'equal':
2025-07-01 17:49:06.392 atags += ' ' * la
2025-07-01 17:49:06.392 btags += ' ' * lb
2025-07-01 17:49:06.392 else:
2025-07-01 17:49:06.392 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.392 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.392 else:
2025-07-01 17:49:06.392 # the synch pair is identical
2025-07-01 17:49:06.392 yield ' ' + aelt
2025-07-01 17:49:06.392
2025-07-01 17:49:06.392 # pump out diffs from after the synch point
2025-07-01 17:49:06.392 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.392
2025-07-01 17:49:06.392 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.392 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.393
2025-07-01 17:49:06.393 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.393 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.393 alo = 138, ahi = 1101
2025-07-01 17:49:06.393 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.393 blo = 138, bhi = 1101
2025-07-01 17:49:06.393
2025-07-01 17:49:06.393 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.393 g = []
2025-07-01 17:49:06.393 if alo < ahi:
2025-07-01 17:49:06.393 if blo < bhi:
2025-07-01 17:49:06.393 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.393 else:
2025-07-01 17:49:06.393 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.393 elif blo < bhi:
2025-07-01 17:49:06.393 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.393
2025-07-01 17:49:06.393 > yield from g
2025-07-01 17:49:06.393
2025-07-01 17:49:06.393 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.394 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.394
2025-07-01 17:49:06.394 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.394 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.394 alo = 138, ahi = 1101
2025-07-01 17:49:06.394 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.394 blo = 138, bhi = 1101
2025-07-01 17:49:06.394
2025-07-01 17:49:06.394 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.394 r"""
2025-07-01 17:49:06.394 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.394 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.394 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.394 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.394
2025-07-01 17:49:06.394 Example:
2025-07-01 17:49:06.394
2025-07-01 17:49:06.394 >>> d = Differ()
2025-07-01 17:49:06.394 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.394 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.394 >>> print(''.join(results), end="")
2025-07-01 17:49:06.395 - abcDefghiJkl
2025-07-01 17:49:06.395 + abcdefGhijkl
2025-07-01 17:49:06.395 """
2025-07-01 17:49:06.395
2025-07-01 17:49:06.395 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.395 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.395 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.395 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.395 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.395
2025-07-01 17:49:06.395 # search for the pair that matches best without being identical
2025-07-01 17:49:06.395 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.395 # on junk -- unless we have to)
2025-07-01 17:49:06.395 for j in range(blo, bhi):
2025-07-01 17:49:06.395 bj = b[j]
2025-07-01 17:49:06.395 cruncher.set_seq2(bj)
2025-07-01 17:49:06.395 for i in range(alo, ahi):
2025-07-01 17:49:06.395 ai = a[i]
2025-07-01 17:49:06.395 if ai == bj:
2025-07-01 17:49:06.395 if eqi is None:
2025-07-01 17:49:06.396 eqi, eqj = i, j
2025-07-01 17:49:06.396 continue
2025-07-01 17:49:06.396 cruncher.set_seq1(ai)
2025-07-01 17:49:06.396 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.396 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.396 # compares by a factor of 3.
2025-07-01 17:49:06.396 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.396 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.396 # of the computation is cached by cruncher
2025-07-01 17:49:06.396 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.396 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.396 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.396 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.396 if best_ratio < cutoff:
2025-07-01 17:49:06.396 # no non-identical "pretty close" pair
2025-07-01 17:49:06.396 if eqi is None:
2025-07-01 17:49:06.396 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.396 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.396 return
2025-07-01 17:49:06.396 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.396 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.399 else:
2025-07-01 17:49:06.400 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.400 eqi = None
2025-07-01 17:49:06.400
2025-07-01 17:49:06.400 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.400 # identical
2025-07-01 17:49:06.400
2025-07-01 17:49:06.400 # pump out diffs from before the synch point
2025-07-01 17:49:06.400 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.400
2025-07-01 17:49:06.400 # do intraline marking on the synch pair
2025-07-01 17:49:06.400 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.400 if eqi is None:
2025-07-01 17:49:06.400 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.400 atags = btags = ""
2025-07-01 17:49:06.400 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.400 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.400 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.400 if tag == 'replace':
2025-07-01 17:49:06.400 atags += '^' * la
2025-07-01 17:49:06.400 btags += '^' * lb
2025-07-01 17:49:06.400 elif tag == 'delete':
2025-07-01 17:49:06.401 atags += '-' * la
2025-07-01 17:49:06.401 elif tag == 'insert':
2025-07-01 17:49:06.401 btags += '+' * lb
2025-07-01 17:49:06.401 elif tag == 'equal':
2025-07-01 17:49:06.401 atags += ' ' * la
2025-07-01 17:49:06.401 btags += ' ' * lb
2025-07-01 17:49:06.401 else:
2025-07-01 17:49:06.401 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.401 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.401 else:
2025-07-01 17:49:06.401 # the synch pair is identical
2025-07-01 17:49:06.401 yield ' ' + aelt
2025-07-01 17:49:06.401
2025-07-01 17:49:06.401 # pump out diffs from after the synch point
2025-07-01 17:49:06.401 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.401
2025-07-01 17:49:06.401 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.401 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.401
2025-07-01 17:49:06.402 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.402 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.402 alo = 139, ahi = 1101
2025-07-01 17:49:06.402 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.402 blo = 139, bhi = 1101
2025-07-01 17:49:06.402
2025-07-01 17:49:06.402 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.402 g = []
2025-07-01 17:49:06.402 if alo < ahi:
2025-07-01 17:49:06.402 if blo < bhi:
2025-07-01 17:49:06.402 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.402 else:
2025-07-01 17:49:06.402 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.402 elif blo < bhi:
2025-07-01 17:49:06.402 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.402
2025-07-01 17:49:06.402 > yield from g
2025-07-01 17:49:06.402
2025-07-01 17:49:06.402 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.402 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.402
2025-07-01 17:49:06.402 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.403 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.403 alo = 139, ahi = 1101
2025-07-01 17:49:06.403 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.403 blo = 139, bhi = 1101
2025-07-01 17:49:06.403
2025-07-01 17:49:06.403 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.403 r"""
2025-07-01 17:49:06.403 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.403 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.403 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.403 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.403
2025-07-01 17:49:06.403 Example:
2025-07-01 17:49:06.403
2025-07-01 17:49:06.403 >>> d = Differ()
2025-07-01 17:49:06.403 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.403 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.403 >>> print(''.join(results), end="")
2025-07-01 17:49:06.403 - abcDefghiJkl
2025-07-01 17:49:06.404 + abcdefGhijkl
2025-07-01 17:49:06.404 """
2025-07-01 17:49:06.404
2025-07-01 17:49:06.404 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.404 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.404 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.404 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.404 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.404
2025-07-01 17:49:06.404 # search for the pair that matches best without being identical
2025-07-01 17:49:06.404 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.404 # on junk -- unless we have to)
2025-07-01 17:49:06.404 for j in range(blo, bhi):
2025-07-01 17:49:06.404 bj = b[j]
2025-07-01 17:49:06.404 cruncher.set_seq2(bj)
2025-07-01 17:49:06.404 for i in range(alo, ahi):
2025-07-01 17:49:06.404 ai = a[i]
2025-07-01 17:49:06.405 if ai == bj:
2025-07-01 17:49:06.405 if eqi is None:
2025-07-01 17:49:06.405 eqi, eqj = i, j
2025-07-01 17:49:06.405 continue
2025-07-01 17:49:06.405 cruncher.set_seq1(ai)
2025-07-01 17:49:06.405 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.405 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.405 # compares by a factor of 3.
2025-07-01 17:49:06.405 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.405 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.405 # of the computation is cached by cruncher
2025-07-01 17:49:06.405 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.405 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.405 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.405 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.405 if best_ratio < cutoff:
2025-07-01 17:49:06.405 # no non-identical "pretty close" pair
2025-07-01 17:49:06.405 if eqi is None:
2025-07-01 17:49:06.405 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.405 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.405 return
2025-07-01 17:49:06.406 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.406 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.406 else:
2025-07-01 17:49:06.406 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.406 eqi = None
2025-07-01 17:49:06.406
2025-07-01 17:49:06.406 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.406 # identical
2025-07-01 17:49:06.406
2025-07-01 17:49:06.406 # pump out diffs from before the synch point
2025-07-01 17:49:06.406 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.406
2025-07-01 17:49:06.406 # do intraline marking on the synch pair
2025-07-01 17:49:06.406 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.406 if eqi is None:
2025-07-01 17:49:06.406 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.406 atags = btags = ""
2025-07-01 17:49:06.406 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.406 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.406 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.406 if tag == 'replace':
2025-07-01 17:49:06.406 atags += '^' * la
2025-07-01 17:49:06.407 btags += '^' * lb
2025-07-01 17:49:06.407 elif tag == 'delete':
2025-07-01 17:49:06.407 atags += '-' * la
2025-07-01 17:49:06.407 elif tag == 'insert':
2025-07-01 17:49:06.407 btags += '+' * lb
2025-07-01 17:49:06.407 elif tag == 'equal':
2025-07-01 17:49:06.407 atags += ' ' * la
2025-07-01 17:49:06.407 btags += ' ' * lb
2025-07-01 17:49:06.407 else:
2025-07-01 17:49:06.407 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.407 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.407 else:
2025-07-01 17:49:06.407 # the synch pair is identical
2025-07-01 17:49:06.407 yield ' ' + aelt
2025-07-01 17:49:06.407
2025-07-01 17:49:06.407 # pump out diffs from after the synch point
2025-07-01 17:49:06.407 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.407
2025-07-01 17:49:06.407 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.407 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.407
2025-07-01 17:49:06.407 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.408 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.408 alo = 140, ahi = 1101
2025-07-01 17:49:06.408 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.408 blo = 140, bhi = 1101
2025-07-01 17:49:06.408
2025-07-01 17:49:06.408 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.408 g = []
2025-07-01 17:49:06.408 if alo < ahi:
2025-07-01 17:49:06.408 if blo < bhi:
2025-07-01 17:49:06.408 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.408 else:
2025-07-01 17:49:06.408 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.408 elif blo < bhi:
2025-07-01 17:49:06.408 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.408
2025-07-01 17:49:06.408 > yield from g
2025-07-01 17:49:06.408
2025-07-01 17:49:06.408 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.408 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.408
2025-07-01 17:49:06.409 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.409 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.409 alo = 140, ahi = 1101
2025-07-01 17:49:06.409 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.409 blo = 140, bhi = 1101
2025-07-01 17:49:06.409
2025-07-01 17:49:06.409 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.409 r"""
2025-07-01 17:49:06.409 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.409 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.409 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.409 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.409
2025-07-01 17:49:06.409 Example:
2025-07-01 17:49:06.409
2025-07-01 17:49:06.409 >>> d = Differ()
2025-07-01 17:49:06.409 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.409 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.409 >>> print(''.join(results), end="")
2025-07-01 17:49:06.409 - abcDefghiJkl
2025-07-01 17:49:06.410 + abcdefGhijkl
2025-07-01 17:49:06.410 """
2025-07-01 17:49:06.410
2025-07-01 17:49:06.410 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.410 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.410 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.410 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.410 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.410
2025-07-01 17:49:06.410 # search for the pair that matches best without being identical
2025-07-01 17:49:06.410 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.410 # on junk -- unless we have to)
2025-07-01 17:49:06.410 for j in range(blo, bhi):
2025-07-01 17:49:06.410 bj = b[j]
2025-07-01 17:49:06.410 cruncher.set_seq2(bj)
2025-07-01 17:49:06.410 for i in range(alo, ahi):
2025-07-01 17:49:06.410 ai = a[i]
2025-07-01 17:49:06.410 if ai == bj:
2025-07-01 17:49:06.410 if eqi is None:
2025-07-01 17:49:06.410 eqi, eqj = i, j
2025-07-01 17:49:06.411 continue
2025-07-01 17:49:06.411 cruncher.set_seq1(ai)
2025-07-01 17:49:06.411 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.411 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.411 # compares by a factor of 3.
2025-07-01 17:49:06.411 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.411 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.411 # of the computation is cached by cruncher
2025-07-01 17:49:06.411 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.411 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.411 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.411 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.411 if best_ratio < cutoff:
2025-07-01 17:49:06.411 # no non-identical "pretty close" pair
2025-07-01 17:49:06.411 if eqi is None:
2025-07-01 17:49:06.411 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.411 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.411 return
2025-07-01 17:49:06.411 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.411 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.412 else:
2025-07-01 17:49:06.412 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.412 eqi = None
2025-07-01 17:49:06.412
2025-07-01 17:49:06.412 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.412 # identical
2025-07-01 17:49:06.412
2025-07-01 17:49:06.412 # pump out diffs from before the synch point
2025-07-01 17:49:06.412 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.412
2025-07-01 17:49:06.412 # do intraline marking on the synch pair
2025-07-01 17:49:06.412 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.412 if eqi is None:
2025-07-01 17:49:06.412 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.412 atags = btags = ""
2025-07-01 17:49:06.412 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.412 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.412 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.412 if tag == 'replace':
2025-07-01 17:49:06.412 atags += '^' * la
2025-07-01 17:49:06.412 btags += '^' * lb
2025-07-01 17:49:06.419 elif tag == 'delete':
2025-07-01 17:49:06.419 atags += '-' * la
2025-07-01 17:49:06.419 elif tag == 'insert':
2025-07-01 17:49:06.419 btags += '+' * lb
2025-07-01 17:49:06.419 elif tag == 'equal':
2025-07-01 17:49:06.419 atags += ' ' * la
2025-07-01 17:49:06.419 btags += ' ' * lb
2025-07-01 17:49:06.419 else:
2025-07-01 17:49:06.419 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.419 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.419 else:
2025-07-01 17:49:06.419 # the synch pair is identical
2025-07-01 17:49:06.419 yield ' ' + aelt
2025-07-01 17:49:06.419
2025-07-01 17:49:06.419 # pump out diffs from after the synch point
2025-07-01 17:49:06.419 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.419
2025-07-01 17:49:06.419 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.419 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.419
2025-07-01 17:49:06.420 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.420 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.420 alo = 141, ahi = 1101
2025-07-01 17:49:06.420 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.420 blo = 141, bhi = 1101
2025-07-01 17:49:06.420
2025-07-01 17:49:06.420 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.420 g = []
2025-07-01 17:49:06.420 if alo < ahi:
2025-07-01 17:49:06.420 if blo < bhi:
2025-07-01 17:49:06.420 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.420 else:
2025-07-01 17:49:06.420 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.420 elif blo < bhi:
2025-07-01 17:49:06.420 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.420
2025-07-01 17:49:06.420 > yield from g
2025-07-01 17:49:06.420
2025-07-01 17:49:06.420 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.420 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.421
2025-07-01 17:49:06.421 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.421 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.421 alo = 141, ahi = 1101
2025-07-01 17:49:06.421 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.421 blo = 141, bhi = 1101
2025-07-01 17:49:06.421
2025-07-01 17:49:06.421 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.421 r"""
2025-07-01 17:49:06.421 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.421 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.421 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.421 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.421
2025-07-01 17:49:06.421 Example:
2025-07-01 17:49:06.421
2025-07-01 17:49:06.421 >>> d = Differ()
2025-07-01 17:49:06.421 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.421 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.421 >>> print(''.join(results), end="")
2025-07-01 17:49:06.421 - abcDefghiJkl
2025-07-01 17:49:06.422 + abcdefGhijkl
2025-07-01 17:49:06.422 """
2025-07-01 17:49:06.422
2025-07-01 17:49:06.422 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.422 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.422 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.422 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.422 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.422
2025-07-01 17:49:06.422 # search for the pair that matches best without being identical
2025-07-01 17:49:06.422 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.422 # on junk -- unless we have to)
2025-07-01 17:49:06.422 for j in range(blo, bhi):
2025-07-01 17:49:06.422 bj = b[j]
2025-07-01 17:49:06.422 cruncher.set_seq2(bj)
2025-07-01 17:49:06.422 for i in range(alo, ahi):
2025-07-01 17:49:06.422 ai = a[i]
2025-07-01 17:49:06.422 if ai == bj:
2025-07-01 17:49:06.422 if eqi is None:
2025-07-01 17:49:06.422 eqi, eqj = i, j
2025-07-01 17:49:06.423 continue
2025-07-01 17:49:06.423 cruncher.set_seq1(ai)
2025-07-01 17:49:06.423 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.423 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.423 # compares by a factor of 3.
2025-07-01 17:49:06.423 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.423 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.423 # of the computation is cached by cruncher
2025-07-01 17:49:06.423 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.423 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.423 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.423 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.423 if best_ratio < cutoff:
2025-07-01 17:49:06.423 # no non-identical "pretty close" pair
2025-07-01 17:49:06.423 if eqi is None:
2025-07-01 17:49:06.423 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.423 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.423 return
2025-07-01 17:49:06.423 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.423 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.423 else:
2025-07-01 17:49:06.424 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.424 eqi = None
2025-07-01 17:49:06.424
2025-07-01 17:49:06.424 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.424 # identical
2025-07-01 17:49:06.424
2025-07-01 17:49:06.424 # pump out diffs from before the synch point
2025-07-01 17:49:06.424 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.424
2025-07-01 17:49:06.424 # do intraline marking on the synch pair
2025-07-01 17:49:06.424 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.424 if eqi is None:
2025-07-01 17:49:06.424 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.424 atags = btags = ""
2025-07-01 17:49:06.424 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.424 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.424 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.424 if tag == 'replace':
2025-07-01 17:49:06.424 atags += '^' * la
2025-07-01 17:49:06.424 btags += '^' * lb
2025-07-01 17:49:06.424 elif tag == 'delete':
2025-07-01 17:49:06.424 atags += '-' * la
2025-07-01 17:49:06.425 elif tag == 'insert':
2025-07-01 17:49:06.425 btags += '+' * lb
2025-07-01 17:49:06.425 elif tag == 'equal':
2025-07-01 17:49:06.425 atags += ' ' * la
2025-07-01 17:49:06.425 btags += ' ' * lb
2025-07-01 17:49:06.425 else:
2025-07-01 17:49:06.425 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.425 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.425 else:
2025-07-01 17:49:06.425 # the synch pair is identical
2025-07-01 17:49:06.425 yield ' ' + aelt
2025-07-01 17:49:06.425
2025-07-01 17:49:06.425 # pump out diffs from after the synch point
2025-07-01 17:49:06.425 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.425
2025-07-01 17:49:06.425 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.425 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.425
2025-07-01 17:49:06.425 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.425 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.425 alo = 142, ahi = 1101
2025-07-01 17:49:06.426 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.426 blo = 142, bhi = 1101
2025-07-01 17:49:06.426
2025-07-01 17:49:06.426 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.426 g = []
2025-07-01 17:49:06.426 if alo < ahi:
2025-07-01 17:49:06.426 if blo < bhi:
2025-07-01 17:49:06.426 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.426 else:
2025-07-01 17:49:06.426 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.426 elif blo < bhi:
2025-07-01 17:49:06.426 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.426
2025-07-01 17:49:06.426 > yield from g
2025-07-01 17:49:06.426
2025-07-01 17:49:06.426 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.426 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.426
2025-07-01 17:49:06.426 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.427 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.427 alo = 142, ahi = 1101
2025-07-01 17:49:06.427 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.427 blo = 142, bhi = 1101
2025-07-01 17:49:06.427
2025-07-01 17:49:06.427 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.427 r"""
2025-07-01 17:49:06.427 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.427 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.427 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.427 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.427
2025-07-01 17:49:06.427 Example:
2025-07-01 17:49:06.427
2025-07-01 17:49:06.427 >>> d = Differ()
2025-07-01 17:49:06.427 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.427 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.427 >>> print(''.join(results), end="")
2025-07-01 17:49:06.427 - abcDefghiJkl
2025-07-01 17:49:06.428 + abcdefGhijkl
2025-07-01 17:49:06.428 """
2025-07-01 17:49:06.428
2025-07-01 17:49:06.428 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.428 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.428 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.428 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.428 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.428
2025-07-01 17:49:06.428 # search for the pair that matches best without being identical
2025-07-01 17:49:06.428 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.428 # on junk -- unless we have to)
2025-07-01 17:49:06.428 for j in range(blo, bhi):
2025-07-01 17:49:06.428 bj = b[j]
2025-07-01 17:49:06.428 cruncher.set_seq2(bj)
2025-07-01 17:49:06.428 for i in range(alo, ahi):
2025-07-01 17:49:06.428 ai = a[i]
2025-07-01 17:49:06.428 if ai == bj:
2025-07-01 17:49:06.428 if eqi is None:
2025-07-01 17:49:06.428 eqi, eqj = i, j
2025-07-01 17:49:06.429 continue
2025-07-01 17:49:06.429 cruncher.set_seq1(ai)
2025-07-01 17:49:06.429 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.429 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.429 # compares by a factor of 3.
2025-07-01 17:49:06.429 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.429 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.429 # of the computation is cached by cruncher
2025-07-01 17:49:06.429 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.429 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.429 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.429 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.429 if best_ratio < cutoff:
2025-07-01 17:49:06.429 # no non-identical "pretty close" pair
2025-07-01 17:49:06.429 if eqi is None:
2025-07-01 17:49:06.429 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.429 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.429 return
2025-07-01 17:49:06.429 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.429 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.430 else:
2025-07-01 17:49:06.432 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.432 eqi = None
2025-07-01 17:49:06.433
2025-07-01 17:49:06.433 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.433 # identical
2025-07-01 17:49:06.433
2025-07-01 17:49:06.433 # pump out diffs from before the synch point
2025-07-01 17:49:06.433 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.433
2025-07-01 17:49:06.433 # do intraline marking on the synch pair
2025-07-01 17:49:06.433 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.433 if eqi is None:
2025-07-01 17:49:06.433 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.433 atags = btags = ""
2025-07-01 17:49:06.433 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.433 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.433 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.433 if tag == 'replace':
2025-07-01 17:49:06.433 atags += '^' * la
2025-07-01 17:49:06.433 btags += '^' * lb
2025-07-01 17:49:06.433 elif tag == 'delete':
2025-07-01 17:49:06.433 atags += '-' * la
2025-07-01 17:49:06.434 elif tag == 'insert':
2025-07-01 17:49:06.434 btags += '+' * lb
2025-07-01 17:49:06.434 elif tag == 'equal':
2025-07-01 17:49:06.434 atags += ' ' * la
2025-07-01 17:49:06.434 btags += ' ' * lb
2025-07-01 17:49:06.434 else:
2025-07-01 17:49:06.434 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.434 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.434 else:
2025-07-01 17:49:06.434 # the synch pair is identical
2025-07-01 17:49:06.434 yield ' ' + aelt
2025-07-01 17:49:06.434
2025-07-01 17:49:06.434 # pump out diffs from after the synch point
2025-07-01 17:49:06.434 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.434
2025-07-01 17:49:06.434 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.434 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.434
2025-07-01 17:49:06.434 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.434 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.434 alo = 143, ahi = 1101
2025-07-01 17:49:06.435 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.435 blo = 143, bhi = 1101
2025-07-01 17:49:06.435
2025-07-01 17:49:06.435 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.435 g = []
2025-07-01 17:49:06.435 if alo < ahi:
2025-07-01 17:49:06.435 if blo < bhi:
2025-07-01 17:49:06.435 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.435 else:
2025-07-01 17:49:06.435 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.435 elif blo < bhi:
2025-07-01 17:49:06.435 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.435
2025-07-01 17:49:06.435 > yield from g
2025-07-01 17:49:06.435
2025-07-01 17:49:06.435 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.435 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.435
2025-07-01 17:49:06.435 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.435 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.435 alo = 143, ahi = 1101
2025-07-01 17:49:06.435 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.436 blo = 143, bhi = 1101
2025-07-01 17:49:06.436
2025-07-01 17:49:06.436 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.436 r"""
2025-07-01 17:49:06.436 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.436 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.436 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.436 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.436
2025-07-01 17:49:06.436 Example:
2025-07-01 17:49:06.436
2025-07-01 17:49:06.436 >>> d = Differ()
2025-07-01 17:49:06.436 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.436 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.436 >>> print(''.join(results), end="")
2025-07-01 17:49:06.436 - abcDefghiJkl
2025-07-01 17:49:06.436 + abcdefGhijkl
2025-07-01 17:49:06.436 """
2025-07-01 17:49:06.437
2025-07-01 17:49:06.437 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.437 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.437 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.437 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.437 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.437
2025-07-01 17:49:06.437 # search for the pair that matches best without being identical
2025-07-01 17:49:06.437 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.437 # on junk -- unless we have to)
2025-07-01 17:49:06.437 for j in range(blo, bhi):
2025-07-01 17:49:06.437 bj = b[j]
2025-07-01 17:49:06.437 cruncher.set_seq2(bj)
2025-07-01 17:49:06.437 for i in range(alo, ahi):
2025-07-01 17:49:06.437 ai = a[i]
2025-07-01 17:49:06.437 if ai == bj:
2025-07-01 17:49:06.437 if eqi is None:
2025-07-01 17:49:06.437 eqi, eqj = i, j
2025-07-01 17:49:06.437 continue
2025-07-01 17:49:06.437 cruncher.set_seq1(ai)
2025-07-01 17:49:06.438 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.438 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.438 # compares by a factor of 3.
2025-07-01 17:49:06.438 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.438 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.438 # of the computation is cached by cruncher
2025-07-01 17:49:06.438 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.438 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.438 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.438 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.438 if best_ratio < cutoff:
2025-07-01 17:49:06.438 # no non-identical "pretty close" pair
2025-07-01 17:49:06.438 if eqi is None:
2025-07-01 17:49:06.438 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.438 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.438 return
2025-07-01 17:49:06.438 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.438 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.438 else:
2025-07-01 17:49:06.438 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.439 eqi = None
2025-07-01 17:49:06.439
2025-07-01 17:49:06.439 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.439 # identical
2025-07-01 17:49:06.439
2025-07-01 17:49:06.439 # pump out diffs from before the synch point
2025-07-01 17:49:06.439 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.439
2025-07-01 17:49:06.439 # do intraline marking on the synch pair
2025-07-01 17:49:06.439 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.439 if eqi is None:
2025-07-01 17:49:06.439 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.439 atags = btags = ""
2025-07-01 17:49:06.439 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.439 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.439 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.439 if tag == 'replace':
2025-07-01 17:49:06.439 atags += '^' * la
2025-07-01 17:49:06.439 btags += '^' * lb
2025-07-01 17:49:06.439 elif tag == 'delete':
2025-07-01 17:49:06.440 atags += '-' * la
2025-07-01 17:49:06.440 elif tag == 'insert':
2025-07-01 17:49:06.440 btags += '+' * lb
2025-07-01 17:49:06.440 elif tag == 'equal':
2025-07-01 17:49:06.440 atags += ' ' * la
2025-07-01 17:49:06.440 btags += ' ' * lb
2025-07-01 17:49:06.440 else:
2025-07-01 17:49:06.440 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.440 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.440 else:
2025-07-01 17:49:06.440 # the synch pair is identical
2025-07-01 17:49:06.440 yield ' ' + aelt
2025-07-01 17:49:06.440
2025-07-01 17:49:06.440 # pump out diffs from after the synch point
2025-07-01 17:49:06.440 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.440
2025-07-01 17:49:06.440 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.440 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.440
2025-07-01 17:49:06.440 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.440 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.441 alo = 144, ahi = 1101
2025-07-01 17:49:06.441 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.441 blo = 144, bhi = 1101
2025-07-01 17:49:06.441
2025-07-01 17:49:06.441 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.441 g = []
2025-07-01 17:49:06.441 if alo < ahi:
2025-07-01 17:49:06.441 if blo < bhi:
2025-07-01 17:49:06.441 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.441 else:
2025-07-01 17:49:06.441 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.441 elif blo < bhi:
2025-07-01 17:49:06.441 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.441
2025-07-01 17:49:06.441 > yield from g
2025-07-01 17:49:06.441
2025-07-01 17:49:06.441 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.441 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.441
2025-07-01 17:49:06.441 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.441 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.442 alo = 144, ahi = 1101
2025-07-01 17:49:06.442 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.442 blo = 144, bhi = 1101
2025-07-01 17:49:06.442
2025-07-01 17:49:06.442 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.442 r"""
2025-07-01 17:49:06.442 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.442 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.442 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.442 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.442
2025-07-01 17:49:06.442 Example:
2025-07-01 17:49:06.442
2025-07-01 17:49:06.442 >>> d = Differ()
2025-07-01 17:49:06.442 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.442 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.442 >>> print(''.join(results), end="")
2025-07-01 17:49:06.442 - abcDefghiJkl
2025-07-01 17:49:06.442 + abcdefGhijkl
2025-07-01 17:49:06.443 """
2025-07-01 17:49:06.443
2025-07-01 17:49:06.443 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.443 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.443 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.443 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.443 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.443
2025-07-01 17:49:06.443 # search for the pair that matches best without being identical
2025-07-01 17:49:06.443 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.443 # on junk -- unless we have to)
2025-07-01 17:49:06.443 for j in range(blo, bhi):
2025-07-01 17:49:06.443 bj = b[j]
2025-07-01 17:49:06.443 cruncher.set_seq2(bj)
2025-07-01 17:49:06.443 for i in range(alo, ahi):
2025-07-01 17:49:06.443 ai = a[i]
2025-07-01 17:49:06.443 if ai == bj:
2025-07-01 17:49:06.443 if eqi is None:
2025-07-01 17:49:06.443 eqi, eqj = i, j
2025-07-01 17:49:06.443 continue
2025-07-01 17:49:06.444 cruncher.set_seq1(ai)
2025-07-01 17:49:06.444 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.444 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.444 # compares by a factor of 3.
2025-07-01 17:49:06.444 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.444 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.444 # of the computation is cached by cruncher
2025-07-01 17:49:06.444 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.444 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.444 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.444 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.444 if best_ratio < cutoff:
2025-07-01 17:49:06.444 # no non-identical "pretty close" pair
2025-07-01 17:49:06.444 if eqi is None:
2025-07-01 17:49:06.444 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.444 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.444 return
2025-07-01 17:49:06.444 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.444 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.444 else:
2025-07-01 17:49:06.445 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.445 eqi = None
2025-07-01 17:49:06.445
2025-07-01 17:49:06.445 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.445 # identical
2025-07-01 17:49:06.445
2025-07-01 17:49:06.445 # pump out diffs from before the synch point
2025-07-01 17:49:06.445 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.445
2025-07-01 17:49:06.445 # do intraline marking on the synch pair
2025-07-01 17:49:06.445 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.445 if eqi is None:
2025-07-01 17:49:06.445 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.445 atags = btags = ""
2025-07-01 17:49:06.445 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.445 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.445 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.445 if tag == 'replace':
2025-07-01 17:49:06.445 atags += '^' * la
2025-07-01 17:49:06.445 btags += '^' * lb
2025-07-01 17:49:06.446 elif tag == 'delete':
2025-07-01 17:49:06.451 atags += '-' * la
2025-07-01 17:49:06.451 elif tag == 'insert':
2025-07-01 17:49:06.451 btags += '+' * lb
2025-07-01 17:49:06.451 elif tag == 'equal':
2025-07-01 17:49:06.451 atags += ' ' * la
2025-07-01 17:49:06.451 btags += ' ' * lb
2025-07-01 17:49:06.451 else:
2025-07-01 17:49:06.451 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.451 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.451 else:
2025-07-01 17:49:06.451 # the synch pair is identical
2025-07-01 17:49:06.451 yield ' ' + aelt
2025-07-01 17:49:06.451
2025-07-01 17:49:06.451 # pump out diffs from after the synch point
2025-07-01 17:49:06.451 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.451
2025-07-01 17:49:06.451 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.451 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.452
2025-07-01 17:49:06.452 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.452 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.452 alo = 145, ahi = 1101
2025-07-01 17:49:06.452 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.452 blo = 145, bhi = 1101
2025-07-01 17:49:06.452
2025-07-01 17:49:06.452 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.452 g = []
2025-07-01 17:49:06.452 if alo < ahi:
2025-07-01 17:49:06.452 if blo < bhi:
2025-07-01 17:49:06.452 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.452 else:
2025-07-01 17:49:06.452 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.452 elif blo < bhi:
2025-07-01 17:49:06.452 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.452
2025-07-01 17:49:06.452 > yield from g
2025-07-01 17:49:06.452
2025-07-01 17:49:06.452 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.452 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.453
2025-07-01 17:49:06.453 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.453 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.453 alo = 145, ahi = 1101
2025-07-01 17:49:06.453 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.453 blo = 145, bhi = 1101
2025-07-01 17:49:06.453
2025-07-01 17:49:06.453 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.453 r"""
2025-07-01 17:49:06.453 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.453 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.453 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.453 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.453
2025-07-01 17:49:06.453 Example:
2025-07-01 17:49:06.453
2025-07-01 17:49:06.453 >>> d = Differ()
2025-07-01 17:49:06.453 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.453 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.453 >>> print(''.join(results), end="")
2025-07-01 17:49:06.454 - abcDefghiJkl
2025-07-01 17:49:06.454 + abcdefGhijkl
2025-07-01 17:49:06.454 """
2025-07-01 17:49:06.454
2025-07-01 17:49:06.454 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.454 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.454 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.454 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.454 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.454
2025-07-01 17:49:06.454 # search for the pair that matches best without being identical
2025-07-01 17:49:06.454 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.454 # on junk -- unless we have to)
2025-07-01 17:49:06.454 for j in range(blo, bhi):
2025-07-01 17:49:06.454 bj = b[j]
2025-07-01 17:49:06.454 cruncher.set_seq2(bj)
2025-07-01 17:49:06.454 for i in range(alo, ahi):
2025-07-01 17:49:06.454 ai = a[i]
2025-07-01 17:49:06.454 if ai == bj:
2025-07-01 17:49:06.455 if eqi is None:
2025-07-01 17:49:06.455 eqi, eqj = i, j
2025-07-01 17:49:06.455 continue
2025-07-01 17:49:06.455 cruncher.set_seq1(ai)
2025-07-01 17:49:06.455 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.455 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.455 # compares by a factor of 3.
2025-07-01 17:49:06.455 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.455 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.455 # of the computation is cached by cruncher
2025-07-01 17:49:06.455 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.455 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.455 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.455 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.455 if best_ratio < cutoff:
2025-07-01 17:49:06.455 # no non-identical "pretty close" pair
2025-07-01 17:49:06.455 if eqi is None:
2025-07-01 17:49:06.455 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.455 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.455 return
2025-07-01 17:49:06.456 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.456 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.456 else:
2025-07-01 17:49:06.456 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.456 eqi = None
2025-07-01 17:49:06.456
2025-07-01 17:49:06.456 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.456 # identical
2025-07-01 17:49:06.456
2025-07-01 17:49:06.456 # pump out diffs from before the synch point
2025-07-01 17:49:06.456 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.456
2025-07-01 17:49:06.456 # do intraline marking on the synch pair
2025-07-01 17:49:06.456 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.456 if eqi is None:
2025-07-01 17:49:06.456 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.456 atags = btags = ""
2025-07-01 17:49:06.456 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.456 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.456 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.456 if tag == 'replace':
2025-07-01 17:49:06.457 atags += '^' * la
2025-07-01 17:49:06.457 btags += '^' * lb
2025-07-01 17:49:06.457 elif tag == 'delete':
2025-07-01 17:49:06.457 atags += '-' * la
2025-07-01 17:49:06.457 elif tag == 'insert':
2025-07-01 17:49:06.457 btags += '+' * lb
2025-07-01 17:49:06.457 elif tag == 'equal':
2025-07-01 17:49:06.457 atags += ' ' * la
2025-07-01 17:49:06.457 btags += ' ' * lb
2025-07-01 17:49:06.457 else:
2025-07-01 17:49:06.457 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.457 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.457 else:
2025-07-01 17:49:06.457 # the synch pair is identical
2025-07-01 17:49:06.457 yield ' ' + aelt
2025-07-01 17:49:06.457
2025-07-01 17:49:06.457 # pump out diffs from after the synch point
2025-07-01 17:49:06.457 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.457
2025-07-01 17:49:06.457 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.457 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.458
2025-07-01 17:49:06.458 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.458 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.458 alo = 146, ahi = 1101
2025-07-01 17:49:06.458 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.458 blo = 146, bhi = 1101
2025-07-01 17:49:06.458
2025-07-01 17:49:06.458 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.458 g = []
2025-07-01 17:49:06.458 if alo < ahi:
2025-07-01 17:49:06.458 if blo < bhi:
2025-07-01 17:49:06.458 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.458 else:
2025-07-01 17:49:06.458 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.458 elif blo < bhi:
2025-07-01 17:49:06.458 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.458
2025-07-01 17:49:06.458 > yield from g
2025-07-01 17:49:06.458
2025-07-01 17:49:06.458 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.458 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.459
2025-07-01 17:49:06.459 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.459 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.459 alo = 146, ahi = 1101
2025-07-01 17:49:06.459 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.459 blo = 146, bhi = 1101
2025-07-01 17:49:06.459
2025-07-01 17:49:06.459 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.459 r"""
2025-07-01 17:49:06.459 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.459 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.459 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.459 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.459
2025-07-01 17:49:06.459 Example:
2025-07-01 17:49:06.459
2025-07-01 17:49:06.459 >>> d = Differ()
2025-07-01 17:49:06.459 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.459 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.460 >>> print(''.join(results), end="")
2025-07-01 17:49:06.460 - abcDefghiJkl
2025-07-01 17:49:06.460 + abcdefGhijkl
2025-07-01 17:49:06.460 """
2025-07-01 17:49:06.460
2025-07-01 17:49:06.460 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.460 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.460 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.460 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.460 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.460
2025-07-01 17:49:06.460 # search for the pair that matches best without being identical
2025-07-01 17:49:06.460 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.460 # on junk -- unless we have to)
2025-07-01 17:49:06.460 for j in range(blo, bhi):
2025-07-01 17:49:06.460 bj = b[j]
2025-07-01 17:49:06.460 cruncher.set_seq2(bj)
2025-07-01 17:49:06.460 for i in range(alo, ahi):
2025-07-01 17:49:06.460 ai = a[i]
2025-07-01 17:49:06.461 if ai == bj:
2025-07-01 17:49:06.463 if eqi is None:
2025-07-01 17:49:06.463 eqi, eqj = i, j
2025-07-01 17:49:06.464 continue
2025-07-01 17:49:06.464 cruncher.set_seq1(ai)
2025-07-01 17:49:06.464 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.464 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.464 # compares by a factor of 3.
2025-07-01 17:49:06.464 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.464 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.464 # of the computation is cached by cruncher
2025-07-01 17:49:06.464 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.464 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.464 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.464 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.464 if best_ratio < cutoff:
2025-07-01 17:49:06.464 # no non-identical "pretty close" pair
2025-07-01 17:49:06.464 if eqi is None:
2025-07-01 17:49:06.464 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.464 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.464 return
2025-07-01 17:49:06.464 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.464 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.465 else:
2025-07-01 17:49:06.465 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.465 eqi = None
2025-07-01 17:49:06.465
2025-07-01 17:49:06.465 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.465 # identical
2025-07-01 17:49:06.465
2025-07-01 17:49:06.465 # pump out diffs from before the synch point
2025-07-01 17:49:06.465 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.465
2025-07-01 17:49:06.465 # do intraline marking on the synch pair
2025-07-01 17:49:06.465 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.465 if eqi is None:
2025-07-01 17:49:06.465 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.465 atags = btags = ""
2025-07-01 17:49:06.465 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.465 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.465 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.465 if tag == 'replace':
2025-07-01 17:49:06.465 atags += '^' * la
2025-07-01 17:49:06.466 btags += '^' * lb
2025-07-01 17:49:06.466 elif tag == 'delete':
2025-07-01 17:49:06.466 atags += '-' * la
2025-07-01 17:49:06.466 elif tag == 'insert':
2025-07-01 17:49:06.466 btags += '+' * lb
2025-07-01 17:49:06.466 elif tag == 'equal':
2025-07-01 17:49:06.466 atags += ' ' * la
2025-07-01 17:49:06.466 btags += ' ' * lb
2025-07-01 17:49:06.466 else:
2025-07-01 17:49:06.466 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.466 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.466 else:
2025-07-01 17:49:06.466 # the synch pair is identical
2025-07-01 17:49:06.466 yield ' ' + aelt
2025-07-01 17:49:06.466
2025-07-01 17:49:06.466 # pump out diffs from after the synch point
2025-07-01 17:49:06.466 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.466
2025-07-01 17:49:06.466 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.466 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.466
2025-07-01 17:49:06.467 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.467 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.467 alo = 147, ahi = 1101
2025-07-01 17:49:06.467 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.467 blo = 147, bhi = 1101
2025-07-01 17:49:06.467
2025-07-01 17:49:06.467 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.467 g = []
2025-07-01 17:49:06.467 if alo < ahi:
2025-07-01 17:49:06.467 if blo < bhi:
2025-07-01 17:49:06.467 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.467 else:
2025-07-01 17:49:06.467 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.467 elif blo < bhi:
2025-07-01 17:49:06.467 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.467
2025-07-01 17:49:06.467 > yield from g
2025-07-01 17:49:06.467
2025-07-01 17:49:06.467 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.467 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.468
2025-07-01 17:49:06.468 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.468 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.468 alo = 147, ahi = 1101
2025-07-01 17:49:06.468 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.468 blo = 147, bhi = 1101
2025-07-01 17:49:06.468
2025-07-01 17:49:06.468 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.468 r"""
2025-07-01 17:49:06.468 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.468 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.468 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.468 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.468
2025-07-01 17:49:06.468 Example:
2025-07-01 17:49:06.468
2025-07-01 17:49:06.468 >>> d = Differ()
2025-07-01 17:49:06.468 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.468 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.468 >>> print(''.join(results), end="")
2025-07-01 17:49:06.468 - abcDefghiJkl
2025-07-01 17:49:06.469 + abcdefGhijkl
2025-07-01 17:49:06.469 """
2025-07-01 17:49:06.469
2025-07-01 17:49:06.469 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.469 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.469 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.469 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.469 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.469
2025-07-01 17:49:06.469 # search for the pair that matches best without being identical
2025-07-01 17:49:06.469 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.469 # on junk -- unless we have to)
2025-07-01 17:49:06.469 for j in range(blo, bhi):
2025-07-01 17:49:06.469 bj = b[j]
2025-07-01 17:49:06.469 cruncher.set_seq2(bj)
2025-07-01 17:49:06.469 for i in range(alo, ahi):
2025-07-01 17:49:06.469 ai = a[i]
2025-07-01 17:49:06.469 if ai == bj:
2025-07-01 17:49:06.470 if eqi is None:
2025-07-01 17:49:06.470 eqi, eqj = i, j
2025-07-01 17:49:06.470 continue
2025-07-01 17:49:06.470 cruncher.set_seq1(ai)
2025-07-01 17:49:06.470 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.470 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.470 # compares by a factor of 3.
2025-07-01 17:49:06.470 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.470 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.470 # of the computation is cached by cruncher
2025-07-01 17:49:06.470 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.470 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.470 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.470 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.470 if best_ratio < cutoff:
2025-07-01 17:49:06.470 # no non-identical "pretty close" pair
2025-07-01 17:49:06.470 if eqi is None:
2025-07-01 17:49:06.470 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.470 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.470 return
2025-07-01 17:49:06.471 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.471 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.471 else:
2025-07-01 17:49:06.471 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.471 eqi = None
2025-07-01 17:49:06.471
2025-07-01 17:49:06.471 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.471 # identical
2025-07-01 17:49:06.471
2025-07-01 17:49:06.471 # pump out diffs from before the synch point
2025-07-01 17:49:06.471 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.471
2025-07-01 17:49:06.471 # do intraline marking on the synch pair
2025-07-01 17:49:06.471 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.471 if eqi is None:
2025-07-01 17:49:06.471 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.471 atags = btags = ""
2025-07-01 17:49:06.471 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.471 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.471 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.472 if tag == 'replace':
2025-07-01 17:49:06.472 atags += '^' * la
2025-07-01 17:49:06.472 btags += '^' * lb
2025-07-01 17:49:06.472 elif tag == 'delete':
2025-07-01 17:49:06.472 atags += '-' * la
2025-07-01 17:49:06.472 elif tag == 'insert':
2025-07-01 17:49:06.472 btags += '+' * lb
2025-07-01 17:49:06.472 elif tag == 'equal':
2025-07-01 17:49:06.472 atags += ' ' * la
2025-07-01 17:49:06.472 btags += ' ' * lb
2025-07-01 17:49:06.472 else:
2025-07-01 17:49:06.472 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.472 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.472 else:
2025-07-01 17:49:06.472 # the synch pair is identical
2025-07-01 17:49:06.472 yield ' ' + aelt
2025-07-01 17:49:06.472
2025-07-01 17:49:06.472 # pump out diffs from after the synch point
2025-07-01 17:49:06.472 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.472
2025-07-01 17:49:06.472 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.473 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.473
2025-07-01 17:49:06.473 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.473 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.473 alo = 148, ahi = 1101
2025-07-01 17:49:06.473 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.473 blo = 148, bhi = 1101
2025-07-01 17:49:06.473
2025-07-01 17:49:06.473 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.473 g = []
2025-07-01 17:49:06.473 if alo < ahi:
2025-07-01 17:49:06.473 if blo < bhi:
2025-07-01 17:49:06.473 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.473 else:
2025-07-01 17:49:06.473 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.473 elif blo < bhi:
2025-07-01 17:49:06.473 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.473
2025-07-01 17:49:06.473 > yield from g
2025-07-01 17:49:06.473
2025-07-01 17:49:06.474 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.474 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.474
2025-07-01 17:49:06.474 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.474 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.474 alo = 148, ahi = 1101
2025-07-01 17:49:06.474 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.474 blo = 148, bhi = 1101
2025-07-01 17:49:06.474
2025-07-01 17:49:06.474 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.474 r"""
2025-07-01 17:49:06.474 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.474 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.474 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.474 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.474
2025-07-01 17:49:06.474 Example:
2025-07-01 17:49:06.474
2025-07-01 17:49:06.474 >>> d = Differ()
2025-07-01 17:49:06.474 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.474 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.475 >>> print(''.join(results), end="")
2025-07-01 17:49:06.475 - abcDefghiJkl
2025-07-01 17:49:06.475 + abcdefGhijkl
2025-07-01 17:49:06.475 """
2025-07-01 17:49:06.475
2025-07-01 17:49:06.475 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.475 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.475 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.475 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.475 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.475
2025-07-01 17:49:06.475 # search for the pair that matches best without being identical
2025-07-01 17:49:06.475 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.475 # on junk -- unless we have to)
2025-07-01 17:49:06.475 for j in range(blo, bhi):
2025-07-01 17:49:06.475 bj = b[j]
2025-07-01 17:49:06.475 cruncher.set_seq2(bj)
2025-07-01 17:49:06.475 for i in range(alo, ahi):
2025-07-01 17:49:06.476 ai = a[i]
2025-07-01 17:49:06.476 if ai == bj:
2025-07-01 17:49:06.476 if eqi is None:
2025-07-01 17:49:06.476 eqi, eqj = i, j
2025-07-01 17:49:06.476 continue
2025-07-01 17:49:06.476 cruncher.set_seq1(ai)
2025-07-01 17:49:06.476 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.476 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.476 # compares by a factor of 3.
2025-07-01 17:49:06.476 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.476 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.476 # of the computation is cached by cruncher
2025-07-01 17:49:06.476 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.476 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.476 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.476 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.476 if best_ratio < cutoff:
2025-07-01 17:49:06.476 # no non-identical "pretty close" pair
2025-07-01 17:49:06.476 if eqi is None:
2025-07-01 17:49:06.476 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.476 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.482 return
2025-07-01 17:49:06.482 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.482 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.482 else:
2025-07-01 17:49:06.482 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.482 eqi = None
2025-07-01 17:49:06.482
2025-07-01 17:49:06.482 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.482 # identical
2025-07-01 17:49:06.482
2025-07-01 17:49:06.482 # pump out diffs from before the synch point
2025-07-01 17:49:06.482 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.482
2025-07-01 17:49:06.482 # do intraline marking on the synch pair
2025-07-01 17:49:06.482 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.482 if eqi is None:
2025-07-01 17:49:06.482 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.482 atags = btags = ""
2025-07-01 17:49:06.482 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.482 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.483 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.483 if tag == 'replace':
2025-07-01 17:49:06.483 atags += '^' * la
2025-07-01 17:49:06.483 btags += '^' * lb
2025-07-01 17:49:06.483 elif tag == 'delete':
2025-07-01 17:49:06.483 atags += '-' * la
2025-07-01 17:49:06.483 elif tag == 'insert':
2025-07-01 17:49:06.483 btags += '+' * lb
2025-07-01 17:49:06.483 elif tag == 'equal':
2025-07-01 17:49:06.483 atags += ' ' * la
2025-07-01 17:49:06.483 btags += ' ' * lb
2025-07-01 17:49:06.483 else:
2025-07-01 17:49:06.483 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.483 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.483 else:
2025-07-01 17:49:06.483 # the synch pair is identical
2025-07-01 17:49:06.483 yield ' ' + aelt
2025-07-01 17:49:06.483
2025-07-01 17:49:06.483 # pump out diffs from after the synch point
2025-07-01 17:49:06.483 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.484
2025-07-01 17:49:06.484 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.484 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.484
2025-07-01 17:49:06.484 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.484 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.484 alo = 149, ahi = 1101
2025-07-01 17:49:06.484 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.484 blo = 149, bhi = 1101
2025-07-01 17:49:06.484
2025-07-01 17:49:06.484 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.484 g = []
2025-07-01 17:49:06.484 if alo < ahi:
2025-07-01 17:49:06.484 if blo < bhi:
2025-07-01 17:49:06.484 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.484 else:
2025-07-01 17:49:06.484 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.484 elif blo < bhi:
2025-07-01 17:49:06.484 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.484
2025-07-01 17:49:06.485 > yield from g
2025-07-01 17:49:06.485
2025-07-01 17:49:06.485 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.485 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.485
2025-07-01 17:49:06.485 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.485 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.485 alo = 149, ahi = 1101
2025-07-01 17:49:06.485 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.485 blo = 149, bhi = 1101
2025-07-01 17:49:06.485
2025-07-01 17:49:06.485 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.485 r"""
2025-07-01 17:49:06.485 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.485 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.485 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.485 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.485
2025-07-01 17:49:06.485 Example:
2025-07-01 17:49:06.485
2025-07-01 17:49:06.486 >>> d = Differ()
2025-07-01 17:49:06.486 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.486 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.486 >>> print(''.join(results), end="")
2025-07-01 17:49:06.486 - abcDefghiJkl
2025-07-01 17:49:06.486 + abcdefGhijkl
2025-07-01 17:49:06.486 """
2025-07-01 17:49:06.486
2025-07-01 17:49:06.486 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.486 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.486 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.486 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.486 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.486
2025-07-01 17:49:06.486 # search for the pair that matches best without being identical
2025-07-01 17:49:06.486 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.486 # on junk -- unless we have to)
2025-07-01 17:49:06.486 for j in range(blo, bhi):
2025-07-01 17:49:06.487 bj = b[j]
2025-07-01 17:49:06.487 cruncher.set_seq2(bj)
2025-07-01 17:49:06.487 for i in range(alo, ahi):
2025-07-01 17:49:06.487 ai = a[i]
2025-07-01 17:49:06.487 if ai == bj:
2025-07-01 17:49:06.487 if eqi is None:
2025-07-01 17:49:06.487 eqi, eqj = i, j
2025-07-01 17:49:06.487 continue
2025-07-01 17:49:06.487 cruncher.set_seq1(ai)
2025-07-01 17:49:06.487 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.487 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.487 # compares by a factor of 3.
2025-07-01 17:49:06.487 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.487 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.487 # of the computation is cached by cruncher
2025-07-01 17:49:06.487 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.487 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.487 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.487 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.487 if best_ratio < cutoff:
2025-07-01 17:49:06.488 # no non-identical "pretty close" pair
2025-07-01 17:49:06.488 if eqi is None:
2025-07-01 17:49:06.488 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.488 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.488 return
2025-07-01 17:49:06.488 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.488 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.488 else:
2025-07-01 17:49:06.488 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.488 eqi = None
2025-07-01 17:49:06.488
2025-07-01 17:49:06.488 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.488 # identical
2025-07-01 17:49:06.488
2025-07-01 17:49:06.488 # pump out diffs from before the synch point
2025-07-01 17:49:06.488 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.488
2025-07-01 17:49:06.488 # do intraline marking on the synch pair
2025-07-01 17:49:06.488 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.488 if eqi is None:
2025-07-01 17:49:06.488 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.489 atags = btags = ""
2025-07-01 17:49:06.489 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.489 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.489 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.489 if tag == 'replace':
2025-07-01 17:49:06.489 atags += '^' * la
2025-07-01 17:49:06.489 btags += '^' * lb
2025-07-01 17:49:06.489 elif tag == 'delete':
2025-07-01 17:49:06.489 atags += '-' * la
2025-07-01 17:49:06.489 elif tag == 'insert':
2025-07-01 17:49:06.489 btags += '+' * lb
2025-07-01 17:49:06.489 elif tag == 'equal':
2025-07-01 17:49:06.489 atags += ' ' * la
2025-07-01 17:49:06.489 btags += ' ' * lb
2025-07-01 17:49:06.489 else:
2025-07-01 17:49:06.489 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.489 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.489 else:
2025-07-01 17:49:06.489 # the synch pair is identical
2025-07-01 17:49:06.489 yield ' ' + aelt
2025-07-01 17:49:06.490
2025-07-01 17:49:06.490 # pump out diffs from after the synch point
2025-07-01 17:49:06.490 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.490
2025-07-01 17:49:06.490 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.490 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.490
2025-07-01 17:49:06.490 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.490 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.490 alo = 150, ahi = 1101
2025-07-01 17:49:06.490 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.490 blo = 150, bhi = 1101
2025-07-01 17:49:06.490
2025-07-01 17:49:06.490 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.490 g = []
2025-07-01 17:49:06.490 if alo < ahi:
2025-07-01 17:49:06.490 if blo < bhi:
2025-07-01 17:49:06.490 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.490 else:
2025-07-01 17:49:06.490 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.491 elif blo < bhi:
2025-07-01 17:49:06.491 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.491
2025-07-01 17:49:06.491 > yield from g
2025-07-01 17:49:06.491
2025-07-01 17:49:06.491 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.491 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.491
2025-07-01 17:49:06.491 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.491 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.491 alo = 150, ahi = 1101
2025-07-01 17:49:06.491 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.491 blo = 150, bhi = 1101
2025-07-01 17:49:06.491
2025-07-01 17:49:06.491 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.491 r"""
2025-07-01 17:49:06.491 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.491 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.491 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.491 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.495
2025-07-01 17:49:06.495 Example:
2025-07-01 17:49:06.495
2025-07-01 17:49:06.495 >>> d = Differ()
2025-07-01 17:49:06.495 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.495 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.495 >>> print(''.join(results), end="")
2025-07-01 17:49:06.495 - abcDefghiJkl
2025-07-01 17:49:06.495 + abcdefGhijkl
2025-07-01 17:49:06.495 """
2025-07-01 17:49:06.495
2025-07-01 17:49:06.495 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.495 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.495 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.495 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.495 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.495
2025-07-01 17:49:06.496 # search for the pair that matches best without being identical
2025-07-01 17:49:06.496 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.496 # on junk -- unless we have to)
2025-07-01 17:49:06.496 for j in range(blo, bhi):
2025-07-01 17:49:06.496 bj = b[j]
2025-07-01 17:49:06.496 cruncher.set_seq2(bj)
2025-07-01 17:49:06.496 for i in range(alo, ahi):
2025-07-01 17:49:06.496 ai = a[i]
2025-07-01 17:49:06.496 if ai == bj:
2025-07-01 17:49:06.496 if eqi is None:
2025-07-01 17:49:06.496 eqi, eqj = i, j
2025-07-01 17:49:06.496 continue
2025-07-01 17:49:06.496 cruncher.set_seq1(ai)
2025-07-01 17:49:06.496 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.496 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.496 # compares by a factor of 3.
2025-07-01 17:49:06.496 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.496 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.496 # of the computation is cached by cruncher
2025-07-01 17:49:06.496 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.497 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.497 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.497 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.497 if best_ratio < cutoff:
2025-07-01 17:49:06.497 # no non-identical "pretty close" pair
2025-07-01 17:49:06.497 if eqi is None:
2025-07-01 17:49:06.497 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.497 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.497 return
2025-07-01 17:49:06.497 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.497 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.497 else:
2025-07-01 17:49:06.497 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.497 eqi = None
2025-07-01 17:49:06.497
2025-07-01 17:49:06.497 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.497 # identical
2025-07-01 17:49:06.497
2025-07-01 17:49:06.497 # pump out diffs from before the synch point
2025-07-01 17:49:06.497 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.497
2025-07-01 17:49:06.498 # do intraline marking on the synch pair
2025-07-01 17:49:06.498 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.498 if eqi is None:
2025-07-01 17:49:06.498 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.498 atags = btags = ""
2025-07-01 17:49:06.498 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.498 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.498 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.498 if tag == 'replace':
2025-07-01 17:49:06.498 atags += '^' * la
2025-07-01 17:49:06.498 btags += '^' * lb
2025-07-01 17:49:06.498 elif tag == 'delete':
2025-07-01 17:49:06.498 atags += '-' * la
2025-07-01 17:49:06.498 elif tag == 'insert':
2025-07-01 17:49:06.498 btags += '+' * lb
2025-07-01 17:49:06.498 elif tag == 'equal':
2025-07-01 17:49:06.498 atags += ' ' * la
2025-07-01 17:49:06.498 btags += ' ' * lb
2025-07-01 17:49:06.498 else:
2025-07-01 17:49:06.498 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.498 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.498 else:
2025-07-01 17:49:06.498 # the synch pair is identical
2025-07-01 17:49:06.498 yield ' ' + aelt
2025-07-01 17:49:06.498
2025-07-01 17:49:06.498 # pump out diffs from after the synch point
2025-07-01 17:49:06.498 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.498
2025-07-01 17:49:06.499 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.499 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.499
2025-07-01 17:49:06.499 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.499 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.499 alo = 151, ahi = 1101
2025-07-01 17:49:06.499 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.499 blo = 151, bhi = 1101
2025-07-01 17:49:06.499
2025-07-01 17:49:06.499 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.499 g = []
2025-07-01 17:49:06.499 if alo < ahi:
2025-07-01 17:49:06.499 if blo < bhi:
2025-07-01 17:49:06.499 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.499 else:
2025-07-01 17:49:06.499 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.499 elif blo < bhi:
2025-07-01 17:49:06.499 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.499
2025-07-01 17:49:06.499 > yield from g
2025-07-01 17:49:06.499
2025-07-01 17:49:06.500 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.500 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.500
2025-07-01 17:49:06.500 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.500 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.500 alo = 151, ahi = 1101
2025-07-01 17:49:06.500 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.500 blo = 151, bhi = 1101
2025-07-01 17:49:06.500
2025-07-01 17:49:06.500 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.500 r"""
2025-07-01 17:49:06.500 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.500 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.500 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.500 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.500
2025-07-01 17:49:06.500 Example:
2025-07-01 17:49:06.500
2025-07-01 17:49:06.500 >>> d = Differ()
2025-07-01 17:49:06.500 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.500 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.501 >>> print(''.join(results), end="")
2025-07-01 17:49:06.501 - abcDefghiJkl
2025-07-01 17:49:06.501 + abcdefGhijkl
2025-07-01 17:49:06.501 """
2025-07-01 17:49:06.501
2025-07-01 17:49:06.501 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.501 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.501 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.501 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.501 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.501
2025-07-01 17:49:06.501 # search for the pair that matches best without being identical
2025-07-01 17:49:06.501 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.501 # on junk -- unless we have to)
2025-07-01 17:49:06.501 for j in range(blo, bhi):
2025-07-01 17:49:06.501 bj = b[j]
2025-07-01 17:49:06.501 cruncher.set_seq2(bj)
2025-07-01 17:49:06.501 for i in range(alo, ahi):
2025-07-01 17:49:06.501 ai = a[i]
2025-07-01 17:49:06.501 if ai == bj:
2025-07-01 17:49:06.502 if eqi is None:
2025-07-01 17:49:06.502 eqi, eqj = i, j
2025-07-01 17:49:06.502 continue
2025-07-01 17:49:06.502 cruncher.set_seq1(ai)
2025-07-01 17:49:06.502 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.502 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.502 # compares by a factor of 3.
2025-07-01 17:49:06.502 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.502 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.502 # of the computation is cached by cruncher
2025-07-01 17:49:06.502 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.502 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.502 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.502 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.502 if best_ratio < cutoff:
2025-07-01 17:49:06.502 # no non-identical "pretty close" pair
2025-07-01 17:49:06.502 if eqi is None:
2025-07-01 17:49:06.502 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.502 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.502 return
2025-07-01 17:49:06.502 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.503 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.503 else:
2025-07-01 17:49:06.503 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.503 eqi = None
2025-07-01 17:49:06.503
2025-07-01 17:49:06.503 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.503 # identical
2025-07-01 17:49:06.503
2025-07-01 17:49:06.503 # pump out diffs from before the synch point
2025-07-01 17:49:06.503 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.503
2025-07-01 17:49:06.503 # do intraline marking on the synch pair
2025-07-01 17:49:06.503 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.503 if eqi is None:
2025-07-01 17:49:06.503 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.503 atags = btags = ""
2025-07-01 17:49:06.503 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.503 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.503 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.504 if tag == 'replace':
2025-07-01 17:49:06.504 atags += '^' * la
2025-07-01 17:49:06.504 btags += '^' * lb
2025-07-01 17:49:06.504 elif tag == 'delete':
2025-07-01 17:49:06.504 atags += '-' * la
2025-07-01 17:49:06.504 elif tag == 'insert':
2025-07-01 17:49:06.504 btags += '+' * lb
2025-07-01 17:49:06.504 elif tag == 'equal':
2025-07-01 17:49:06.504 atags += ' ' * la
2025-07-01 17:49:06.504 btags += ' ' * lb
2025-07-01 17:49:06.504 else:
2025-07-01 17:49:06.504 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.504 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.504 else:
2025-07-01 17:49:06.504 # the synch pair is identical
2025-07-01 17:49:06.504 yield ' ' + aelt
2025-07-01 17:49:06.504
2025-07-01 17:49:06.504 # pump out diffs from after the synch point
2025-07-01 17:49:06.504 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.505
2025-07-01 17:49:06.505 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.505 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.505
2025-07-01 17:49:06.505 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.505 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.505 alo = 152, ahi = 1101
2025-07-01 17:49:06.505 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.505 blo = 152, bhi = 1101
2025-07-01 17:49:06.505
2025-07-01 17:49:06.505 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.505 g = []
2025-07-01 17:49:06.505 if alo < ahi:
2025-07-01 17:49:06.505 if blo < bhi:
2025-07-01 17:49:06.505 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.505 else:
2025-07-01 17:49:06.505 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.505 elif blo < bhi:
2025-07-01 17:49:06.505 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.505
2025-07-01 17:49:06.505 > yield from g
2025-07-01 17:49:06.505
2025-07-01 17:49:06.506 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.506 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.506
2025-07-01 17:49:06.506 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.506 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.506 alo = 152, ahi = 1101
2025-07-01 17:49:06.506 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.506 blo = 152, bhi = 1101
2025-07-01 17:49:06.506
2025-07-01 17:49:06.506 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.506 r"""
2025-07-01 17:49:06.506 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.506 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.506 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.506 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.506
2025-07-01 17:49:06.506 Example:
2025-07-01 17:49:06.506
2025-07-01 17:49:06.506 >>> d = Differ()
2025-07-01 17:49:06.506 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.507 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.511 >>> print(''.join(results), end="")
2025-07-01 17:49:06.512 - abcDefghiJkl
2025-07-01 17:49:06.512 + abcdefGhijkl
2025-07-01 17:49:06.512 """
2025-07-01 17:49:06.512
2025-07-01 17:49:06.512 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.512 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.512 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.512 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.512 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.512
2025-07-01 17:49:06.512 # search for the pair that matches best without being identical
2025-07-01 17:49:06.512 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.512 # on junk -- unless we have to)
2025-07-01 17:49:06.512 for j in range(blo, bhi):
2025-07-01 17:49:06.512 bj = b[j]
2025-07-01 17:49:06.512 cruncher.set_seq2(bj)
2025-07-01 17:49:06.512 for i in range(alo, ahi):
2025-07-01 17:49:06.512 ai = a[i]
2025-07-01 17:49:06.512 if ai == bj:
2025-07-01 17:49:06.513 if eqi is None:
2025-07-01 17:49:06.513 eqi, eqj = i, j
2025-07-01 17:49:06.513 continue
2025-07-01 17:49:06.513 cruncher.set_seq1(ai)
2025-07-01 17:49:06.513 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.513 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.513 # compares by a factor of 3.
2025-07-01 17:49:06.513 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.513 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.513 # of the computation is cached by cruncher
2025-07-01 17:49:06.513 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.513 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.513 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.513 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.513 if best_ratio < cutoff:
2025-07-01 17:49:06.513 # no non-identical "pretty close" pair
2025-07-01 17:49:06.513 if eqi is None:
2025-07-01 17:49:06.513 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.513 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.513 return
2025-07-01 17:49:06.513 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.514 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.514 else:
2025-07-01 17:49:06.514 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.514 eqi = None
2025-07-01 17:49:06.514
2025-07-01 17:49:06.514 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.514 # identical
2025-07-01 17:49:06.514
2025-07-01 17:49:06.514 # pump out diffs from before the synch point
2025-07-01 17:49:06.514 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.514
2025-07-01 17:49:06.514 # do intraline marking on the synch pair
2025-07-01 17:49:06.514 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.514 if eqi is None:
2025-07-01 17:49:06.514 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.514 atags = btags = ""
2025-07-01 17:49:06.514 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.514 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.514 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.514 if tag == 'replace':
2025-07-01 17:49:06.515 atags += '^' * la
2025-07-01 17:49:06.515 btags += '^' * lb
2025-07-01 17:49:06.515 elif tag == 'delete':
2025-07-01 17:49:06.515 atags += '-' * la
2025-07-01 17:49:06.515 elif tag == 'insert':
2025-07-01 17:49:06.515 btags += '+' * lb
2025-07-01 17:49:06.515 elif tag == 'equal':
2025-07-01 17:49:06.515 atags += ' ' * la
2025-07-01 17:49:06.515 btags += ' ' * lb
2025-07-01 17:49:06.515 else:
2025-07-01 17:49:06.515 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.515 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.515 else:
2025-07-01 17:49:06.515 # the synch pair is identical
2025-07-01 17:49:06.515 yield ' ' + aelt
2025-07-01 17:49:06.515
2025-07-01 17:49:06.515 # pump out diffs from after the synch point
2025-07-01 17:49:06.515 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.515
2025-07-01 17:49:06.515 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.515 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.515
2025-07-01 17:49:06.516 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.516 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.516 alo = 153, ahi = 1101
2025-07-01 17:49:06.516 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.516 blo = 153, bhi = 1101
2025-07-01 17:49:06.516
2025-07-01 17:49:06.516 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.516 g = []
2025-07-01 17:49:06.516 if alo < ahi:
2025-07-01 17:49:06.516 if blo < bhi:
2025-07-01 17:49:06.516 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.516 else:
2025-07-01 17:49:06.516 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.516 elif blo < bhi:
2025-07-01 17:49:06.516 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.516
2025-07-01 17:49:06.516 > yield from g
2025-07-01 17:49:06.516
2025-07-01 17:49:06.516 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.517 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.517
2025-07-01 17:49:06.517 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.517 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.517 alo = 153, ahi = 1101
2025-07-01 17:49:06.517 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.517 blo = 153, bhi = 1101
2025-07-01 17:49:06.517
2025-07-01 17:49:06.517 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.517 r"""
2025-07-01 17:49:06.517 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.517 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.517 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.517 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.517
2025-07-01 17:49:06.517 Example:
2025-07-01 17:49:06.517
2025-07-01 17:49:06.517 >>> d = Differ()
2025-07-01 17:49:06.517 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.518 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.518 >>> print(''.join(results), end="")
2025-07-01 17:49:06.518 - abcDefghiJkl
2025-07-01 17:49:06.518 + abcdefGhijkl
2025-07-01 17:49:06.518 """
2025-07-01 17:49:06.518
2025-07-01 17:49:06.518 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.518 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.518 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.518 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.518 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.518
2025-07-01 17:49:06.518 # search for the pair that matches best without being identical
2025-07-01 17:49:06.518 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.518 # on junk -- unless we have to)
2025-07-01 17:49:06.518 for j in range(blo, bhi):
2025-07-01 17:49:06.518 bj = b[j]
2025-07-01 17:49:06.518 cruncher.set_seq2(bj)
2025-07-01 17:49:06.518 for i in range(alo, ahi):
2025-07-01 17:49:06.519 ai = a[i]
2025-07-01 17:49:06.519 if ai == bj:
2025-07-01 17:49:06.519 if eqi is None:
2025-07-01 17:49:06.519 eqi, eqj = i, j
2025-07-01 17:49:06.519 continue
2025-07-01 17:49:06.519 cruncher.set_seq1(ai)
2025-07-01 17:49:06.519 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.519 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.519 # compares by a factor of 3.
2025-07-01 17:49:06.519 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.519 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.519 # of the computation is cached by cruncher
2025-07-01 17:49:06.519 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.519 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.519 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.519 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.519 if best_ratio < cutoff:
2025-07-01 17:49:06.519 # no non-identical "pretty close" pair
2025-07-01 17:49:06.519 if eqi is None:
2025-07-01 17:49:06.519 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.520 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.520 return
2025-07-01 17:49:06.520 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.520 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.520 else:
2025-07-01 17:49:06.520 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.520 eqi = None
2025-07-01 17:49:06.520
2025-07-01 17:49:06.520 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.520 # identical
2025-07-01 17:49:06.520
2025-07-01 17:49:06.520 # pump out diffs from before the synch point
2025-07-01 17:49:06.520 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.520
2025-07-01 17:49:06.520 # do intraline marking on the synch pair
2025-07-01 17:49:06.520 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.520 if eqi is None:
2025-07-01 17:49:06.520 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.520 atags = btags = ""
2025-07-01 17:49:06.520 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.521 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.521 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.521 if tag == 'replace':
2025-07-01 17:49:06.521 atags += '^' * la
2025-07-01 17:49:06.521 btags += '^' * lb
2025-07-01 17:49:06.521 elif tag == 'delete':
2025-07-01 17:49:06.521 atags += '-' * la
2025-07-01 17:49:06.521 elif tag == 'insert':
2025-07-01 17:49:06.521 btags += '+' * lb
2025-07-01 17:49:06.521 elif tag == 'equal':
2025-07-01 17:49:06.521 atags += ' ' * la
2025-07-01 17:49:06.521 btags += ' ' * lb
2025-07-01 17:49:06.521 else:
2025-07-01 17:49:06.521 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.521 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.521 else:
2025-07-01 17:49:06.521 # the synch pair is identical
2025-07-01 17:49:06.521 yield ' ' + aelt
2025-07-01 17:49:06.521
2025-07-01 17:49:06.521 # pump out diffs from after the synch point
2025-07-01 17:49:06.521 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.522
2025-07-01 17:49:06.522 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.522 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.522
2025-07-01 17:49:06.522 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.522 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.522 alo = 154, ahi = 1101
2025-07-01 17:49:06.522 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.522 blo = 154, bhi = 1101
2025-07-01 17:49:06.522
2025-07-01 17:49:06.522 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.522 g = []
2025-07-01 17:49:06.522 if alo < ahi:
2025-07-01 17:49:06.522 if blo < bhi:
2025-07-01 17:49:06.522 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.522 else:
2025-07-01 17:49:06.522 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.522 elif blo < bhi:
2025-07-01 17:49:06.522 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.522
2025-07-01 17:49:06.523 > yield from g
2025-07-01 17:49:06.526
2025-07-01 17:49:06.526 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.526 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.526
2025-07-01 17:49:06.526 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.526 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.526 alo = 154, ahi = 1101
2025-07-01 17:49:06.526 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.526 blo = 154, bhi = 1101
2025-07-01 17:49:06.526
2025-07-01 17:49:06.526 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.526 r"""
2025-07-01 17:49:06.526 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.526 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.526 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.526 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.526
2025-07-01 17:49:06.526 Example:
2025-07-01 17:49:06.526
2025-07-01 17:49:06.527 >>> d = Differ()
2025-07-01 17:49:06.527 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.527 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.527 >>> print(''.join(results), end="")
2025-07-01 17:49:06.527 - abcDefghiJkl
2025-07-01 17:49:06.527 + abcdefGhijkl
2025-07-01 17:49:06.527 """
2025-07-01 17:49:06.527
2025-07-01 17:49:06.527 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.527 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.527 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.527 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.527 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.527
2025-07-01 17:49:06.527 # search for the pair that matches best without being identical
2025-07-01 17:49:06.527 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.527 # on junk -- unless we have to)
2025-07-01 17:49:06.527 for j in range(blo, bhi):
2025-07-01 17:49:06.527 bj = b[j]
2025-07-01 17:49:06.527 cruncher.set_seq2(bj)
2025-07-01 17:49:06.528 for i in range(alo, ahi):
2025-07-01 17:49:06.528 ai = a[i]
2025-07-01 17:49:06.528 if ai == bj:
2025-07-01 17:49:06.528 if eqi is None:
2025-07-01 17:49:06.528 eqi, eqj = i, j
2025-07-01 17:49:06.528 continue
2025-07-01 17:49:06.528 cruncher.set_seq1(ai)
2025-07-01 17:49:06.528 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.528 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.528 # compares by a factor of 3.
2025-07-01 17:49:06.528 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.528 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.528 # of the computation is cached by cruncher
2025-07-01 17:49:06.528 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.528 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.528 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.529 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.529 if best_ratio < cutoff:
2025-07-01 17:49:06.529 # no non-identical "pretty close" pair
2025-07-01 17:49:06.529 if eqi is None:
2025-07-01 17:49:06.529 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.529 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.529 return
2025-07-01 17:49:06.529 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.529 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.529 else:
2025-07-01 17:49:06.529 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.529 eqi = None
2025-07-01 17:49:06.529
2025-07-01 17:49:06.529 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.529 # identical
2025-07-01 17:49:06.529
2025-07-01 17:49:06.529 # pump out diffs from before the synch point
2025-07-01 17:49:06.529 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.529
2025-07-01 17:49:06.529 # do intraline marking on the synch pair
2025-07-01 17:49:06.530 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.530 if eqi is None:
2025-07-01 17:49:06.530 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.530 atags = btags = ""
2025-07-01 17:49:06.530 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.530 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.530 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.530 if tag == 'replace':
2025-07-01 17:49:06.530 atags += '^' * la
2025-07-01 17:49:06.530 btags += '^' * lb
2025-07-01 17:49:06.530 elif tag == 'delete':
2025-07-01 17:49:06.530 atags += '-' * la
2025-07-01 17:49:06.530 elif tag == 'insert':
2025-07-01 17:49:06.530 btags += '+' * lb
2025-07-01 17:49:06.530 elif tag == 'equal':
2025-07-01 17:49:06.530 atags += ' ' * la
2025-07-01 17:49:06.530 btags += ' ' * lb
2025-07-01 17:49:06.530 else:
2025-07-01 17:49:06.530 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.530 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.531 else:
2025-07-01 17:49:06.531 # the synch pair is identical
2025-07-01 17:49:06.531 yield ' ' + aelt
2025-07-01 17:49:06.531
2025-07-01 17:49:06.531 # pump out diffs from after the synch point
2025-07-01 17:49:06.531 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.531
2025-07-01 17:49:06.531 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.531 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.531
2025-07-01 17:49:06.531 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.531 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.531 alo = 155, ahi = 1101
2025-07-01 17:49:06.531 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.531 blo = 155, bhi = 1101
2025-07-01 17:49:06.531
2025-07-01 17:49:06.531 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.531 g = []
2025-07-01 17:49:06.531 if alo < ahi:
2025-07-01 17:49:06.531 if blo < bhi:
2025-07-01 17:49:06.531 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.531 else:
2025-07-01 17:49:06.532 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.532 elif blo < bhi:
2025-07-01 17:49:06.532 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.532
2025-07-01 17:49:06.532 > yield from g
2025-07-01 17:49:06.532
2025-07-01 17:49:06.532 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.532 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.532
2025-07-01 17:49:06.532 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.532 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.532 alo = 155, ahi = 1101
2025-07-01 17:49:06.532 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.532 blo = 155, bhi = 1101
2025-07-01 17:49:06.532
2025-07-01 17:49:06.532 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.532 r"""
2025-07-01 17:49:06.532 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.532 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.532 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.532 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.533
2025-07-01 17:49:06.533 Example:
2025-07-01 17:49:06.533
2025-07-01 17:49:06.533 >>> d = Differ()
2025-07-01 17:49:06.533 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.533 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.533 >>> print(''.join(results), end="")
2025-07-01 17:49:06.533 - abcDefghiJkl
2025-07-01 17:49:06.533 + abcdefGhijkl
2025-07-01 17:49:06.533 """
2025-07-01 17:49:06.533
2025-07-01 17:49:06.533 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.533 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.533 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.533 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.533 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.533
2025-07-01 17:49:06.533 # search for the pair that matches best without being identical
2025-07-01 17:49:06.533 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.533 # on junk -- unless we have to)
2025-07-01 17:49:06.534 for j in range(blo, bhi):
2025-07-01 17:49:06.534 bj = b[j]
2025-07-01 17:49:06.534 cruncher.set_seq2(bj)
2025-07-01 17:49:06.534 for i in range(alo, ahi):
2025-07-01 17:49:06.534 ai = a[i]
2025-07-01 17:49:06.534 if ai == bj:
2025-07-01 17:49:06.534 if eqi is None:
2025-07-01 17:49:06.534 eqi, eqj = i, j
2025-07-01 17:49:06.534 continue
2025-07-01 17:49:06.534 cruncher.set_seq1(ai)
2025-07-01 17:49:06.534 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.534 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.534 # compares by a factor of 3.
2025-07-01 17:49:06.534 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.534 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.534 # of the computation is cached by cruncher
2025-07-01 17:49:06.534 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.534 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.534 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.534 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.534 if best_ratio < cutoff:
2025-07-01 17:49:06.535 # no non-identical "pretty close" pair
2025-07-01 17:49:06.535 if eqi is None:
2025-07-01 17:49:06.535 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.535 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.535 return
2025-07-01 17:49:06.535 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.535 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.535 else:
2025-07-01 17:49:06.535 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.535 eqi = None
2025-07-01 17:49:06.535
2025-07-01 17:49:06.535 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.535 # identical
2025-07-01 17:49:06.535
2025-07-01 17:49:06.535 # pump out diffs from before the synch point
2025-07-01 17:49:06.535 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.535
2025-07-01 17:49:06.535 # do intraline marking on the synch pair
2025-07-01 17:49:06.535 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.535 if eqi is None:
2025-07-01 17:49:06.535 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.535 atags = btags = ""
2025-07-01 17:49:06.536 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.536 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.536 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.536 if tag == 'replace':
2025-07-01 17:49:06.536 atags += '^' * la
2025-07-01 17:49:06.536 btags += '^' * lb
2025-07-01 17:49:06.536 elif tag == 'delete':
2025-07-01 17:49:06.536 atags += '-' * la
2025-07-01 17:49:06.536 elif tag == 'insert':
2025-07-01 17:49:06.536 btags += '+' * lb
2025-07-01 17:49:06.536 elif tag == 'equal':
2025-07-01 17:49:06.536 atags += ' ' * la
2025-07-01 17:49:06.536 btags += ' ' * lb
2025-07-01 17:49:06.536 else:
2025-07-01 17:49:06.536 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.536 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.536 else:
2025-07-01 17:49:06.536 # the synch pair is identical
2025-07-01 17:49:06.536 yield ' ' + aelt
2025-07-01 17:49:06.536
2025-07-01 17:49:06.537 # pump out diffs from after the synch point
2025-07-01 17:49:06.537 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.537
2025-07-01 17:49:06.537 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.537 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.537
2025-07-01 17:49:06.537 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.537 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.537 alo = 158, ahi = 1101
2025-07-01 17:49:06.537 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.537 blo = 158, bhi = 1101
2025-07-01 17:49:06.537
2025-07-01 17:49:06.537 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.537 g = []
2025-07-01 17:49:06.537 if alo < ahi:
2025-07-01 17:49:06.537 if blo < bhi:
2025-07-01 17:49:06.537 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.537 else:
2025-07-01 17:49:06.537 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.537 elif blo < bhi:
2025-07-01 17:49:06.538 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.538
2025-07-01 17:49:06.538 > yield from g
2025-07-01 17:49:06.538
2025-07-01 17:49:06.538 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.538 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.538
2025-07-01 17:49:06.538 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.538 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.538 alo = 158, ahi = 1101
2025-07-01 17:49:06.538 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.538 blo = 158, bhi = 1101
2025-07-01 17:49:06.538
2025-07-01 17:49:06.538 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.538 r"""
2025-07-01 17:49:06.538 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.538 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.538 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.538 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.538
2025-07-01 17:49:06.538 Example:
2025-07-01 17:49:06.539
2025-07-01 17:49:06.544 >>> d = Differ()
2025-07-01 17:49:06.544 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.544 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.544 >>> print(''.join(results), end="")
2025-07-01 17:49:06.544 - abcDefghiJkl
2025-07-01 17:49:06.544 + abcdefGhijkl
2025-07-01 17:49:06.544 """
2025-07-01 17:49:06.544
2025-07-01 17:49:06.544 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.544 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.544 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.544 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.544 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.544
2025-07-01 17:49:06.544 # search for the pair that matches best without being identical
2025-07-01 17:49:06.544 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.544 # on junk -- unless we have to)
2025-07-01 17:49:06.545 for j in range(blo, bhi):
2025-07-01 17:49:06.545 bj = b[j]
2025-07-01 17:49:06.545 cruncher.set_seq2(bj)
2025-07-01 17:49:06.545 for i in range(alo, ahi):
2025-07-01 17:49:06.545 ai = a[i]
2025-07-01 17:49:06.545 if ai == bj:
2025-07-01 17:49:06.545 if eqi is None:
2025-07-01 17:49:06.545 eqi, eqj = i, j
2025-07-01 17:49:06.545 continue
2025-07-01 17:49:06.545 cruncher.set_seq1(ai)
2025-07-01 17:49:06.545 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.545 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.545 # compares by a factor of 3.
2025-07-01 17:49:06.545 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.545 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.545 # of the computation is cached by cruncher
2025-07-01 17:49:06.545 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.545 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.545 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.545 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.545 if best_ratio < cutoff:
2025-07-01 17:49:06.545 # no non-identical "pretty close" pair
2025-07-01 17:49:06.546 if eqi is None:
2025-07-01 17:49:06.546 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.546 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.546 return
2025-07-01 17:49:06.546 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.546 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.546 else:
2025-07-01 17:49:06.546 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.546 eqi = None
2025-07-01 17:49:06.546
2025-07-01 17:49:06.546 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.546 # identical
2025-07-01 17:49:06.546
2025-07-01 17:49:06.546 # pump out diffs from before the synch point
2025-07-01 17:49:06.546 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.546
2025-07-01 17:49:06.546 # do intraline marking on the synch pair
2025-07-01 17:49:06.546 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.546 if eqi is None:
2025-07-01 17:49:06.546 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.546 atags = btags = ""
2025-07-01 17:49:06.547 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.547 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.547 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.547 if tag == 'replace':
2025-07-01 17:49:06.547 atags += '^' * la
2025-07-01 17:49:06.547 btags += '^' * lb
2025-07-01 17:49:06.547 elif tag == 'delete':
2025-07-01 17:49:06.547 atags += '-' * la
2025-07-01 17:49:06.547 elif tag == 'insert':
2025-07-01 17:49:06.547 btags += '+' * lb
2025-07-01 17:49:06.547 elif tag == 'equal':
2025-07-01 17:49:06.547 atags += ' ' * la
2025-07-01 17:49:06.547 btags += ' ' * lb
2025-07-01 17:49:06.547 else:
2025-07-01 17:49:06.547 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.547 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.547 else:
2025-07-01 17:49:06.547 # the synch pair is identical
2025-07-01 17:49:06.547 yield ' ' + aelt
2025-07-01 17:49:06.547
2025-07-01 17:49:06.547 # pump out diffs from after the synch point
2025-07-01 17:49:06.548 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.548
2025-07-01 17:49:06.548 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.548 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.548
2025-07-01 17:49:06.548 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.548 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.548 alo = 159, ahi = 1101
2025-07-01 17:49:06.548 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.548 blo = 159, bhi = 1101
2025-07-01 17:49:06.548
2025-07-01 17:49:06.548 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.548 g = []
2025-07-01 17:49:06.548 if alo < ahi:
2025-07-01 17:49:06.548 if blo < bhi:
2025-07-01 17:49:06.548 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.548 else:
2025-07-01 17:49:06.548 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.548 elif blo < bhi:
2025-07-01 17:49:06.548 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.548
2025-07-01 17:49:06.548 > yield from g
2025-07-01 17:49:06.549
2025-07-01 17:49:06.549 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.549 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.549
2025-07-01 17:49:06.549 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.549 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.549 alo = 159, ahi = 1101
2025-07-01 17:49:06.549 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.549 blo = 159, bhi = 1101
2025-07-01 17:49:06.549
2025-07-01 17:49:06.549 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.549 r"""
2025-07-01 17:49:06.549 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.549 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.549 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.549 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.549
2025-07-01 17:49:06.549 Example:
2025-07-01 17:49:06.549
2025-07-01 17:49:06.550 >>> d = Differ()
2025-07-01 17:49:06.550 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.550 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.550 >>> print(''.join(results), end="")
2025-07-01 17:49:06.550 - abcDefghiJkl
2025-07-01 17:49:06.550 + abcdefGhijkl
2025-07-01 17:49:06.550 """
2025-07-01 17:49:06.550
2025-07-01 17:49:06.550 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.550 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.550 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.550 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.550 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.550
2025-07-01 17:49:06.550 # search for the pair that matches best without being identical
2025-07-01 17:49:06.550 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.550 # on junk -- unless we have to)
2025-07-01 17:49:06.550 for j in range(blo, bhi):
2025-07-01 17:49:06.551 bj = b[j]
2025-07-01 17:49:06.551 cruncher.set_seq2(bj)
2025-07-01 17:49:06.551 for i in range(alo, ahi):
2025-07-01 17:49:06.551 ai = a[i]
2025-07-01 17:49:06.551 if ai == bj:
2025-07-01 17:49:06.551 if eqi is None:
2025-07-01 17:49:06.551 eqi, eqj = i, j
2025-07-01 17:49:06.551 continue
2025-07-01 17:49:06.551 cruncher.set_seq1(ai)
2025-07-01 17:49:06.551 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.551 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.551 # compares by a factor of 3.
2025-07-01 17:49:06.551 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.551 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.551 # of the computation is cached by cruncher
2025-07-01 17:49:06.551 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.551 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.551 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.551 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.551 if best_ratio < cutoff:
2025-07-01 17:49:06.552 # no non-identical "pretty close" pair
2025-07-01 17:49:06.552 if eqi is None:
2025-07-01 17:49:06.552 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.552 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.552 return
2025-07-01 17:49:06.552 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.552 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.552 else:
2025-07-01 17:49:06.552 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.552 eqi = None
2025-07-01 17:49:06.552
2025-07-01 17:49:06.552 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.552 # identical
2025-07-01 17:49:06.552
2025-07-01 17:49:06.552 # pump out diffs from before the synch point
2025-07-01 17:49:06.552 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.552
2025-07-01 17:49:06.552 # do intraline marking on the synch pair
2025-07-01 17:49:06.552 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.552 if eqi is None:
2025-07-01 17:49:06.552 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.552 atags = btags = ""
2025-07-01 17:49:06.553 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.553 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.553 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.553 if tag == 'replace':
2025-07-01 17:49:06.553 atags += '^' * la
2025-07-01 17:49:06.553 btags += '^' * lb
2025-07-01 17:49:06.553 elif tag == 'delete':
2025-07-01 17:49:06.553 atags += '-' * la
2025-07-01 17:49:06.553 elif tag == 'insert':
2025-07-01 17:49:06.553 btags += '+' * lb
2025-07-01 17:49:06.553 elif tag == 'equal':
2025-07-01 17:49:06.553 atags += ' ' * la
2025-07-01 17:49:06.553 btags += ' ' * lb
2025-07-01 17:49:06.553 else:
2025-07-01 17:49:06.553 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.553 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.553 else:
2025-07-01 17:49:06.553 # the synch pair is identical
2025-07-01 17:49:06.553 yield ' ' + aelt
2025-07-01 17:49:06.553
2025-07-01 17:49:06.553 # pump out diffs from after the synch point
2025-07-01 17:49:06.554 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.556
2025-07-01 17:49:06.557 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.557 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.557
2025-07-01 17:49:06.557 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.557 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.557 alo = 160, ahi = 1101
2025-07-01 17:49:06.557 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.557 blo = 160, bhi = 1101
2025-07-01 17:49:06.557
2025-07-01 17:49:06.557 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.557 g = []
2025-07-01 17:49:06.557 if alo < ahi:
2025-07-01 17:49:06.557 if blo < bhi:
2025-07-01 17:49:06.557 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.557 else:
2025-07-01 17:49:06.557 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.557 elif blo < bhi:
2025-07-01 17:49:06.557 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.557
2025-07-01 17:49:06.557 > yield from g
2025-07-01 17:49:06.558
2025-07-01 17:49:06.558 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.558 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.558
2025-07-01 17:49:06.558 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.558 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.558 alo = 160, ahi = 1101
2025-07-01 17:49:06.558 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.558 blo = 160, bhi = 1101
2025-07-01 17:49:06.558
2025-07-01 17:49:06.558 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.558 r"""
2025-07-01 17:49:06.558 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.558 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.558 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.558 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.558
2025-07-01 17:49:06.558 Example:
2025-07-01 17:49:06.558
2025-07-01 17:49:06.558 >>> d = Differ()
2025-07-01 17:49:06.558 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.559 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.559 >>> print(''.join(results), end="")
2025-07-01 17:49:06.559 - abcDefghiJkl
2025-07-01 17:49:06.559 + abcdefGhijkl
2025-07-01 17:49:06.559 """
2025-07-01 17:49:06.559
2025-07-01 17:49:06.559 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.559 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.559 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.559 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.559 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.559
2025-07-01 17:49:06.559 # search for the pair that matches best without being identical
2025-07-01 17:49:06.559 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.559 # on junk -- unless we have to)
2025-07-01 17:49:06.559 for j in range(blo, bhi):
2025-07-01 17:49:06.559 bj = b[j]
2025-07-01 17:49:06.559 cruncher.set_seq2(bj)
2025-07-01 17:49:06.559 for i in range(alo, ahi):
2025-07-01 17:49:06.559 ai = a[i]
2025-07-01 17:49:06.560 if ai == bj:
2025-07-01 17:49:06.560 if eqi is None:
2025-07-01 17:49:06.560 eqi, eqj = i, j
2025-07-01 17:49:06.560 continue
2025-07-01 17:49:06.560 cruncher.set_seq1(ai)
2025-07-01 17:49:06.560 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.561 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.561 # compares by a factor of 3.
2025-07-01 17:49:06.561 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.561 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.561 # of the computation is cached by cruncher
2025-07-01 17:49:06.561 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.561 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.561 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.561 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.561 if best_ratio < cutoff:
2025-07-01 17:49:06.561 # no non-identical "pretty close" pair
2025-07-01 17:49:06.561 if eqi is None:
2025-07-01 17:49:06.561 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.561 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.561 return
2025-07-01 17:49:06.561 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.561 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.561 else:
2025-07-01 17:49:06.561 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.562 eqi = None
2025-07-01 17:49:06.562
2025-07-01 17:49:06.562 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.562 # identical
2025-07-01 17:49:06.562
2025-07-01 17:49:06.562 # pump out diffs from before the synch point
2025-07-01 17:49:06.562 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.562
2025-07-01 17:49:06.562 # do intraline marking on the synch pair
2025-07-01 17:49:06.562 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.562 if eqi is None:
2025-07-01 17:49:06.562 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.562 atags = btags = ""
2025-07-01 17:49:06.562 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.562 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.562 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.562 if tag == 'replace':
2025-07-01 17:49:06.562 atags += '^' * la
2025-07-01 17:49:06.562 btags += '^' * lb
2025-07-01 17:49:06.562 elif tag == 'delete':
2025-07-01 17:49:06.563 atags += '-' * la
2025-07-01 17:49:06.563 elif tag == 'insert':
2025-07-01 17:49:06.563 btags += '+' * lb
2025-07-01 17:49:06.563 elif tag == 'equal':
2025-07-01 17:49:06.563 atags += ' ' * la
2025-07-01 17:49:06.563 btags += ' ' * lb
2025-07-01 17:49:06.563 else:
2025-07-01 17:49:06.563 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.563 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.563 else:
2025-07-01 17:49:06.563 # the synch pair is identical
2025-07-01 17:49:06.563 yield ' ' + aelt
2025-07-01 17:49:06.563
2025-07-01 17:49:06.563 # pump out diffs from after the synch point
2025-07-01 17:49:06.563 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.563
2025-07-01 17:49:06.563 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.563 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.563
2025-07-01 17:49:06.563 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.563 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.564 alo = 161, ahi = 1101
2025-07-01 17:49:06.564 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.564 blo = 161, bhi = 1101
2025-07-01 17:49:06.564
2025-07-01 17:49:06.564 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.564 g = []
2025-07-01 17:49:06.564 if alo < ahi:
2025-07-01 17:49:06.564 if blo < bhi:
2025-07-01 17:49:06.564 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.564 else:
2025-07-01 17:49:06.564 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.564 elif blo < bhi:
2025-07-01 17:49:06.564 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.564
2025-07-01 17:49:06.564 > yield from g
2025-07-01 17:49:06.564
2025-07-01 17:49:06.564 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.564 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.564
2025-07-01 17:49:06.564 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.564 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.565 alo = 161, ahi = 1101
2025-07-01 17:49:06.565 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.565 blo = 161, bhi = 1101
2025-07-01 17:49:06.565
2025-07-01 17:49:06.565 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.565 r"""
2025-07-01 17:49:06.565 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.565 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.565 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.565 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.565
2025-07-01 17:49:06.565 Example:
2025-07-01 17:49:06.565
2025-07-01 17:49:06.565 >>> d = Differ()
2025-07-01 17:49:06.565 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.565 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.565 >>> print(''.join(results), end="")
2025-07-01 17:49:06.565 - abcDefghiJkl
2025-07-01 17:49:06.565 + abcdefGhijkl
2025-07-01 17:49:06.566 """
2025-07-01 17:49:06.566
2025-07-01 17:49:06.566 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.566 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.566 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.566 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.566 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.566
2025-07-01 17:49:06.566 # search for the pair that matches best without being identical
2025-07-01 17:49:06.566 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.566 # on junk -- unless we have to)
2025-07-01 17:49:06.566 for j in range(blo, bhi):
2025-07-01 17:49:06.566 bj = b[j]
2025-07-01 17:49:06.566 cruncher.set_seq2(bj)
2025-07-01 17:49:06.566 for i in range(alo, ahi):
2025-07-01 17:49:06.566 ai = a[i]
2025-07-01 17:49:06.566 if ai == bj:
2025-07-01 17:49:06.566 if eqi is None:
2025-07-01 17:49:06.566 eqi, eqj = i, j
2025-07-01 17:49:06.566 continue
2025-07-01 17:49:06.567 cruncher.set_seq1(ai)
2025-07-01 17:49:06.567 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.567 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.567 # compares by a factor of 3.
2025-07-01 17:49:06.567 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.567 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.567 # of the computation is cached by cruncher
2025-07-01 17:49:06.567 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.567 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.567 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.567 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.567 if best_ratio < cutoff:
2025-07-01 17:49:06.567 # no non-identical "pretty close" pair
2025-07-01 17:49:06.567 if eqi is None:
2025-07-01 17:49:06.567 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.567 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.567 return
2025-07-01 17:49:06.567 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.567 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.567 else:
2025-07-01 17:49:06.567 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.568 eqi = None
2025-07-01 17:49:06.568
2025-07-01 17:49:06.568 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.568 # identical
2025-07-01 17:49:06.568
2025-07-01 17:49:06.568 # pump out diffs from before the synch point
2025-07-01 17:49:06.568 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.568
2025-07-01 17:49:06.568 # do intraline marking on the synch pair
2025-07-01 17:49:06.568 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.568 if eqi is None:
2025-07-01 17:49:06.568 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.568 atags = btags = ""
2025-07-01 17:49:06.568 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.568 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.568 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.568 if tag == 'replace':
2025-07-01 17:49:06.568 atags += '^' * la
2025-07-01 17:49:06.568 btags += '^' * lb
2025-07-01 17:49:06.568 elif tag == 'delete':
2025-07-01 17:49:06.568 atags += '-' * la
2025-07-01 17:49:06.569 elif tag == 'insert':
2025-07-01 17:49:06.569 btags += '+' * lb
2025-07-01 17:49:06.569 elif tag == 'equal':
2025-07-01 17:49:06.569 atags += ' ' * la
2025-07-01 17:49:06.569 btags += ' ' * lb
2025-07-01 17:49:06.569 else:
2025-07-01 17:49:06.569 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.569 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.569 else:
2025-07-01 17:49:06.569 # the synch pair is identical
2025-07-01 17:49:06.569 yield ' ' + aelt
2025-07-01 17:49:06.569
2025-07-01 17:49:06.569 # pump out diffs from after the synch point
2025-07-01 17:49:06.569 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.569
2025-07-01 17:49:06.569 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.569 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.569
2025-07-01 17:49:06.569 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.569 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.570 alo = 162, ahi = 1101
2025-07-01 17:49:06.575 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.575 blo = 162, bhi = 1101
2025-07-01 17:49:06.575
2025-07-01 17:49:06.575 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.575 g = []
2025-07-01 17:49:06.575 if alo < ahi:
2025-07-01 17:49:06.575 if blo < bhi:
2025-07-01 17:49:06.575 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.575 else:
2025-07-01 17:49:06.575 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.575 elif blo < bhi:
2025-07-01 17:49:06.575 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.576
2025-07-01 17:49:06.576 > yield from g
2025-07-01 17:49:06.576
2025-07-01 17:49:06.576 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.576 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.576
2025-07-01 17:49:06.576 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.576 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.576 alo = 162, ahi = 1101
2025-07-01 17:49:06.576 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.576 blo = 162, bhi = 1101
2025-07-01 17:49:06.576
2025-07-01 17:49:06.576 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.576 r"""
2025-07-01 17:49:06.576 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.576 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.576 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.576 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.576
2025-07-01 17:49:06.577 Example:
2025-07-01 17:49:06.577
2025-07-01 17:49:06.577 >>> d = Differ()
2025-07-01 17:49:06.577 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.577 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.577 >>> print(''.join(results), end="")
2025-07-01 17:49:06.577 - abcDefghiJkl
2025-07-01 17:49:06.577 + abcdefGhijkl
2025-07-01 17:49:06.577 """
2025-07-01 17:49:06.577
2025-07-01 17:49:06.577 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.577 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.577 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.577 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.577 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.577
2025-07-01 17:49:06.577 # search for the pair that matches best without being identical
2025-07-01 17:49:06.577 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.577 # on junk -- unless we have to)
2025-07-01 17:49:06.578 for j in range(blo, bhi):
2025-07-01 17:49:06.578 bj = b[j]
2025-07-01 17:49:06.578 cruncher.set_seq2(bj)
2025-07-01 17:49:06.578 for i in range(alo, ahi):
2025-07-01 17:49:06.578 ai = a[i]
2025-07-01 17:49:06.578 if ai == bj:
2025-07-01 17:49:06.578 if eqi is None:
2025-07-01 17:49:06.578 eqi, eqj = i, j
2025-07-01 17:49:06.578 continue
2025-07-01 17:49:06.578 cruncher.set_seq1(ai)
2025-07-01 17:49:06.578 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.578 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.578 # compares by a factor of 3.
2025-07-01 17:49:06.578 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.578 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.578 # of the computation is cached by cruncher
2025-07-01 17:49:06.578 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.578 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.578 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.578 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.578 if best_ratio < cutoff:
2025-07-01 17:49:06.578 # no non-identical "pretty close" pair
2025-07-01 17:49:06.579 if eqi is None:
2025-07-01 17:49:06.579 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.579 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.579 return
2025-07-01 17:49:06.579 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.579 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.579 else:
2025-07-01 17:49:06.579 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.579 eqi = None
2025-07-01 17:49:06.579
2025-07-01 17:49:06.579 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.579 # identical
2025-07-01 17:49:06.579
2025-07-01 17:49:06.579 # pump out diffs from before the synch point
2025-07-01 17:49:06.579 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.579
2025-07-01 17:49:06.579 # do intraline marking on the synch pair
2025-07-01 17:49:06.579 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.579 if eqi is None:
2025-07-01 17:49:06.579 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.579 atags = btags = ""
2025-07-01 17:49:06.580 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.580 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.580 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.580 if tag == 'replace':
2025-07-01 17:49:06.580 atags += '^' * la
2025-07-01 17:49:06.580 btags += '^' * lb
2025-07-01 17:49:06.580 elif tag == 'delete':
2025-07-01 17:49:06.580 atags += '-' * la
2025-07-01 17:49:06.580 elif tag == 'insert':
2025-07-01 17:49:06.580 btags += '+' * lb
2025-07-01 17:49:06.580 elif tag == 'equal':
2025-07-01 17:49:06.580 atags += ' ' * la
2025-07-01 17:49:06.580 btags += ' ' * lb
2025-07-01 17:49:06.580 else:
2025-07-01 17:49:06.580 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.580 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.580 else:
2025-07-01 17:49:06.580 # the synch pair is identical
2025-07-01 17:49:06.580 yield ' ' + aelt
2025-07-01 17:49:06.580
2025-07-01 17:49:06.580 # pump out diffs from after the synch point
2025-07-01 17:49:06.580 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.581
2025-07-01 17:49:06.581 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.581 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.581
2025-07-01 17:49:06.581 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.581 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.581 alo = 163, ahi = 1101
2025-07-01 17:49:06.581 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.581 blo = 163, bhi = 1101
2025-07-01 17:49:06.581
2025-07-01 17:49:06.581 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.581 g = []
2025-07-01 17:49:06.581 if alo < ahi:
2025-07-01 17:49:06.581 if blo < bhi:
2025-07-01 17:49:06.581 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.581 else:
2025-07-01 17:49:06.581 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.581 elif blo < bhi:
2025-07-01 17:49:06.581 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.581
2025-07-01 17:49:06.582 > yield from g
2025-07-01 17:49:06.582
2025-07-01 17:49:06.582 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.582 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.582
2025-07-01 17:49:06.582 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.582 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.582 alo = 163, ahi = 1101
2025-07-01 17:49:06.582 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.582 blo = 163, bhi = 1101
2025-07-01 17:49:06.582
2025-07-01 17:49:06.582 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.582 r"""
2025-07-01 17:49:06.582 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.582 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.582 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.582 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.582
2025-07-01 17:49:06.582 Example:
2025-07-01 17:49:06.582
2025-07-01 17:49:06.582 >>> d = Differ()
2025-07-01 17:49:06.583 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.583 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.583 >>> print(''.join(results), end="")
2025-07-01 17:49:06.583 - abcDefghiJkl
2025-07-01 17:49:06.583 + abcdefGhijkl
2025-07-01 17:49:06.583 """
2025-07-01 17:49:06.583
2025-07-01 17:49:06.583 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.583 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.583 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.583 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.583 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.583
2025-07-01 17:49:06.583 # search for the pair that matches best without being identical
2025-07-01 17:49:06.583 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.583 # on junk -- unless we have to)
2025-07-01 17:49:06.583 for j in range(blo, bhi):
2025-07-01 17:49:06.584 bj = b[j]
2025-07-01 17:49:06.584 cruncher.set_seq2(bj)
2025-07-01 17:49:06.584 for i in range(alo, ahi):
2025-07-01 17:49:06.584 ai = a[i]
2025-07-01 17:49:06.584 if ai == bj:
2025-07-01 17:49:06.584 if eqi is None:
2025-07-01 17:49:06.584 eqi, eqj = i, j
2025-07-01 17:49:06.584 continue
2025-07-01 17:49:06.584 cruncher.set_seq1(ai)
2025-07-01 17:49:06.584 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.584 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.584 # compares by a factor of 3.
2025-07-01 17:49:06.584 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.584 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.584 # of the computation is cached by cruncher
2025-07-01 17:49:06.584 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.584 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.584 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.584 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.584 if best_ratio < cutoff:
2025-07-01 17:49:06.585 # no non-identical "pretty close" pair
2025-07-01 17:49:06.585 if eqi is None:
2025-07-01 17:49:06.585 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.585 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.585 return
2025-07-01 17:49:06.585 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.585 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.585 else:
2025-07-01 17:49:06.585 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.585 eqi = None
2025-07-01 17:49:06.585
2025-07-01 17:49:06.585 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.585 # identical
2025-07-01 17:49:06.585
2025-07-01 17:49:06.585 # pump out diffs from before the synch point
2025-07-01 17:49:06.585 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.585
2025-07-01 17:49:06.585 # do intraline marking on the synch pair
2025-07-01 17:49:06.585 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.585 if eqi is None:
2025-07-01 17:49:06.586 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.586 atags = btags = ""
2025-07-01 17:49:06.588 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.589 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.589 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.589 if tag == 'replace':
2025-07-01 17:49:06.589 atags += '^' * la
2025-07-01 17:49:06.589 btags += '^' * lb
2025-07-01 17:49:06.589 elif tag == 'delete':
2025-07-01 17:49:06.589 atags += '-' * la
2025-07-01 17:49:06.589 elif tag == 'insert':
2025-07-01 17:49:06.589 btags += '+' * lb
2025-07-01 17:49:06.589 elif tag == 'equal':
2025-07-01 17:49:06.589 atags += ' ' * la
2025-07-01 17:49:06.589 btags += ' ' * lb
2025-07-01 17:49:06.589 else:
2025-07-01 17:49:06.589 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.589 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.589 else:
2025-07-01 17:49:06.589 # the synch pair is identical
2025-07-01 17:49:06.589 yield ' ' + aelt
2025-07-01 17:49:06.589
2025-07-01 17:49:06.589 # pump out diffs from after the synch point
2025-07-01 17:49:06.589 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.590
2025-07-01 17:49:06.590 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.590 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.590
2025-07-01 17:49:06.590 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.590 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.590 alo = 164, ahi = 1101
2025-07-01 17:49:06.590 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.590 blo = 164, bhi = 1101
2025-07-01 17:49:06.590
2025-07-01 17:49:06.590 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.590 g = []
2025-07-01 17:49:06.590 if alo < ahi:
2025-07-01 17:49:06.590 if blo < bhi:
2025-07-01 17:49:06.590 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.590 else:
2025-07-01 17:49:06.590 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.590 elif blo < bhi:
2025-07-01 17:49:06.590 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.590
2025-07-01 17:49:06.590 > yield from g
2025-07-01 17:49:06.591
2025-07-01 17:49:06.591 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.591 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.591
2025-07-01 17:49:06.591 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.591 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.591 alo = 164, ahi = 1101
2025-07-01 17:49:06.591 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.591 blo = 164, bhi = 1101
2025-07-01 17:49:06.591
2025-07-01 17:49:06.591 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.591 r"""
2025-07-01 17:49:06.591 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.591 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.591 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.591 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.591
2025-07-01 17:49:06.591 Example:
2025-07-01 17:49:06.591
2025-07-01 17:49:06.591 >>> d = Differ()
2025-07-01 17:49:06.591 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.592 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.592 >>> print(''.join(results), end="")
2025-07-01 17:49:06.592 - abcDefghiJkl
2025-07-01 17:49:06.592 + abcdefGhijkl
2025-07-01 17:49:06.592 """
2025-07-01 17:49:06.592
2025-07-01 17:49:06.592 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.592 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.592 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.592 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.592 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.592
2025-07-01 17:49:06.592 # search for the pair that matches best without being identical
2025-07-01 17:49:06.592 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.592 # on junk -- unless we have to)
2025-07-01 17:49:06.592 for j in range(blo, bhi):
2025-07-01 17:49:06.592 bj = b[j]
2025-07-01 17:49:06.592 cruncher.set_seq2(bj)
2025-07-01 17:49:06.592 for i in range(alo, ahi):
2025-07-01 17:49:06.593 ai = a[i]
2025-07-01 17:49:06.593 if ai == bj:
2025-07-01 17:49:06.593 if eqi is None:
2025-07-01 17:49:06.593 eqi, eqj = i, j
2025-07-01 17:49:06.593 continue
2025-07-01 17:49:06.593 cruncher.set_seq1(ai)
2025-07-01 17:49:06.593 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.593 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.593 # compares by a factor of 3.
2025-07-01 17:49:06.593 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.593 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.593 # of the computation is cached by cruncher
2025-07-01 17:49:06.593 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.593 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.593 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.593 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.593 if best_ratio < cutoff:
2025-07-01 17:49:06.593 # no non-identical "pretty close" pair
2025-07-01 17:49:06.593 if eqi is None:
2025-07-01 17:49:06.593 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.594 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.594 return
2025-07-01 17:49:06.594 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.594 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.594 else:
2025-07-01 17:49:06.594 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.594 eqi = None
2025-07-01 17:49:06.594
2025-07-01 17:49:06.594 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.594 # identical
2025-07-01 17:49:06.594
2025-07-01 17:49:06.594 # pump out diffs from before the synch point
2025-07-01 17:49:06.594 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.594
2025-07-01 17:49:06.594 # do intraline marking on the synch pair
2025-07-01 17:49:06.594 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.594 if eqi is None:
2025-07-01 17:49:06.594 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.594 atags = btags = ""
2025-07-01 17:49:06.594 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.594 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.595 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.595 if tag == 'replace':
2025-07-01 17:49:06.595 atags += '^' * la
2025-07-01 17:49:06.595 btags += '^' * lb
2025-07-01 17:49:06.595 elif tag == 'delete':
2025-07-01 17:49:06.595 atags += '-' * la
2025-07-01 17:49:06.595 elif tag == 'insert':
2025-07-01 17:49:06.595 btags += '+' * lb
2025-07-01 17:49:06.595 elif tag == 'equal':
2025-07-01 17:49:06.595 atags += ' ' * la
2025-07-01 17:49:06.595 btags += ' ' * lb
2025-07-01 17:49:06.595 else:
2025-07-01 17:49:06.595 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.595 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.595 else:
2025-07-01 17:49:06.595 # the synch pair is identical
2025-07-01 17:49:06.595 yield ' ' + aelt
2025-07-01 17:49:06.595
2025-07-01 17:49:06.595 # pump out diffs from after the synch point
2025-07-01 17:49:06.596 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.596
2025-07-01 17:49:06.596 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.596 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.596
2025-07-01 17:49:06.596 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.596 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.596 alo = 165, ahi = 1101
2025-07-01 17:49:06.596 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.596 blo = 165, bhi = 1101
2025-07-01 17:49:06.596
2025-07-01 17:49:06.596 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.596 g = []
2025-07-01 17:49:06.596 if alo < ahi:
2025-07-01 17:49:06.596 if blo < bhi:
2025-07-01 17:49:06.596 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.596 else:
2025-07-01 17:49:06.596 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.596 elif blo < bhi:
2025-07-01 17:49:06.596 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.596
2025-07-01 17:49:06.597 > yield from g
2025-07-01 17:49:06.597
2025-07-01 17:49:06.597 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.597 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.597
2025-07-01 17:49:06.597 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.597 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.597 alo = 165, ahi = 1101
2025-07-01 17:49:06.597 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.597 blo = 165, bhi = 1101
2025-07-01 17:49:06.597
2025-07-01 17:49:06.597 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.597 r"""
2025-07-01 17:49:06.597 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.597 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.597 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.597 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.597
2025-07-01 17:49:06.597 Example:
2025-07-01 17:49:06.597
2025-07-01 17:49:06.598 >>> d = Differ()
2025-07-01 17:49:06.598 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.598 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.598 >>> print(''.join(results), end="")
2025-07-01 17:49:06.598 - abcDefghiJkl
2025-07-01 17:49:06.598 + abcdefGhijkl
2025-07-01 17:49:06.598 """
2025-07-01 17:49:06.598
2025-07-01 17:49:06.598 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.598 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.598 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.598 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.598 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.598
2025-07-01 17:49:06.598 # search for the pair that matches best without being identical
2025-07-01 17:49:06.598 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.598 # on junk -- unless we have to)
2025-07-01 17:49:06.598 for j in range(blo, bhi):
2025-07-01 17:49:06.598 bj = b[j]
2025-07-01 17:49:06.599 cruncher.set_seq2(bj)
2025-07-01 17:49:06.599 for i in range(alo, ahi):
2025-07-01 17:49:06.599 ai = a[i]
2025-07-01 17:49:06.599 if ai == bj:
2025-07-01 17:49:06.599 if eqi is None:
2025-07-01 17:49:06.599 eqi, eqj = i, j
2025-07-01 17:49:06.599 continue
2025-07-01 17:49:06.599 cruncher.set_seq1(ai)
2025-07-01 17:49:06.599 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.599 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.599 # compares by a factor of 3.
2025-07-01 17:49:06.599 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.599 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.599 # of the computation is cached by cruncher
2025-07-01 17:49:06.599 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.599 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.599 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.599 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.599 if best_ratio < cutoff:
2025-07-01 17:49:06.599 # no non-identical "pretty close" pair
2025-07-01 17:49:06.599 if eqi is None:
2025-07-01 17:49:06.600 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.600 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.600 return
2025-07-01 17:49:06.600 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.600 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.600 else:
2025-07-01 17:49:06.600 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.600 eqi = None
2025-07-01 17:49:06.600
2025-07-01 17:49:06.600 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.600 # identical
2025-07-01 17:49:06.600
2025-07-01 17:49:06.600 # pump out diffs from before the synch point
2025-07-01 17:49:06.600 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.600
2025-07-01 17:49:06.601 # do intraline marking on the synch pair
2025-07-01 17:49:06.601 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.601 if eqi is None:
2025-07-01 17:49:06.601 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.601 atags = btags = ""
2025-07-01 17:49:06.601 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.601 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.601 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.601 if tag == 'replace':
2025-07-01 17:49:06.601 atags += '^' * la
2025-07-01 17:49:06.601 btags += '^' * lb
2025-07-01 17:49:06.601 elif tag == 'delete':
2025-07-01 17:49:06.601 atags += '-' * la
2025-07-01 17:49:06.601 elif tag == 'insert':
2025-07-01 17:49:06.601 btags += '+' * lb
2025-07-01 17:49:06.601 elif tag == 'equal':
2025-07-01 17:49:06.601 atags += ' ' * la
2025-07-01 17:49:06.606 btags += ' ' * lb
2025-07-01 17:49:06.606 else:
2025-07-01 17:49:06.607 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.607 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.607 else:
2025-07-01 17:49:06.607 # the synch pair is identical
2025-07-01 17:49:06.607 yield ' ' + aelt
2025-07-01 17:49:06.607
2025-07-01 17:49:06.607 # pump out diffs from after the synch point
2025-07-01 17:49:06.607 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.607
2025-07-01 17:49:06.607 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.607 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.607
2025-07-01 17:49:06.607 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.607 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.607 alo = 166, ahi = 1101
2025-07-01 17:49:06.607 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.608 blo = 166, bhi = 1101
2025-07-01 17:49:06.608
2025-07-01 17:49:06.608 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.608 g = []
2025-07-01 17:49:06.608 if alo < ahi:
2025-07-01 17:49:06.608 if blo < bhi:
2025-07-01 17:49:06.608 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.608 else:
2025-07-01 17:49:06.608 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.608 elif blo < bhi:
2025-07-01 17:49:06.608 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.608
2025-07-01 17:49:06.608 > yield from g
2025-07-01 17:49:06.608
2025-07-01 17:49:06.608 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.608 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.608
2025-07-01 17:49:06.609 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.609 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.609 alo = 166, ahi = 1101
2025-07-01 17:49:06.609 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.609 blo = 166, bhi = 1101
2025-07-01 17:49:06.609
2025-07-01 17:49:06.609 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.609 r"""
2025-07-01 17:49:06.609 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.609 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.609 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.609 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.609
2025-07-01 17:49:06.609 Example:
2025-07-01 17:49:06.609
2025-07-01 17:49:06.610 >>> d = Differ()
2025-07-01 17:49:06.610 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.610 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.610 >>> print(''.join(results), end="")
2025-07-01 17:49:06.610 - abcDefghiJkl
2025-07-01 17:49:06.610 + abcdefGhijkl
2025-07-01 17:49:06.610 """
2025-07-01 17:49:06.610
2025-07-01 17:49:06.610 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.610 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.610 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.610 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.610 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.610
2025-07-01 17:49:06.610 # search for the pair that matches best without being identical
2025-07-01 17:49:06.611 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.611 # on junk -- unless we have to)
2025-07-01 17:49:06.611 for j in range(blo, bhi):
2025-07-01 17:49:06.611 bj = b[j]
2025-07-01 17:49:06.611 cruncher.set_seq2(bj)
2025-07-01 17:49:06.611 for i in range(alo, ahi):
2025-07-01 17:49:06.611 ai = a[i]
2025-07-01 17:49:06.611 if ai == bj:
2025-07-01 17:49:06.611 if eqi is None:
2025-07-01 17:49:06.611 eqi, eqj = i, j
2025-07-01 17:49:06.611 continue
2025-07-01 17:49:06.611 cruncher.set_seq1(ai)
2025-07-01 17:49:06.611 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.611 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.611 # compares by a factor of 3.
2025-07-01 17:49:06.611 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.612 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.612 # of the computation is cached by cruncher
2025-07-01 17:49:06.612 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.612 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.612 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.612 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.612 if best_ratio < cutoff:
2025-07-01 17:49:06.612 # no non-identical "pretty close" pair
2025-07-01 17:49:06.612 if eqi is None:
2025-07-01 17:49:06.612 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.612 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.612 return
2025-07-01 17:49:06.612 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.612 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.612 else:
2025-07-01 17:49:06.612 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.613 eqi = None
2025-07-01 17:49:06.613
2025-07-01 17:49:06.613 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.613 # identical
2025-07-01 17:49:06.613
2025-07-01 17:49:06.613 # pump out diffs from before the synch point
2025-07-01 17:49:06.613 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.613
2025-07-01 17:49:06.613 # do intraline marking on the synch pair
2025-07-01 17:49:06.613 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.613 if eqi is None:
2025-07-01 17:49:06.613 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.613 atags = btags = ""
2025-07-01 17:49:06.613 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.613 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.613 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.613 if tag == 'replace':
2025-07-01 17:49:06.614 atags += '^' * la
2025-07-01 17:49:06.614 btags += '^' * lb
2025-07-01 17:49:06.614 elif tag == 'delete':
2025-07-01 17:49:06.614 atags += '-' * la
2025-07-01 17:49:06.614 elif tag == 'insert':
2025-07-01 17:49:06.614 btags += '+' * lb
2025-07-01 17:49:06.614 elif tag == 'equal':
2025-07-01 17:49:06.614 atags += ' ' * la
2025-07-01 17:49:06.614 btags += ' ' * lb
2025-07-01 17:49:06.614 else:
2025-07-01 17:49:06.614 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.614 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.614 else:
2025-07-01 17:49:06.614 # the synch pair is identical
2025-07-01 17:49:06.614 yield ' ' + aelt
2025-07-01 17:49:06.614
2025-07-01 17:49:06.615 # pump out diffs from after the synch point
2025-07-01 17:49:06.615 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.615
2025-07-01 17:49:06.615 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.615 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.615
2025-07-01 17:49:06.615 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.615 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.615 alo = 167, ahi = 1101
2025-07-01 17:49:06.615 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.615 blo = 167, bhi = 1101
2025-07-01 17:49:06.615
2025-07-01 17:49:06.615 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.615 g = []
2025-07-01 17:49:06.615 if alo < ahi:
2025-07-01 17:49:06.615 if blo < bhi:
2025-07-01 17:49:06.616 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.616 else:
2025-07-01 17:49:06.616 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.616 elif blo < bhi:
2025-07-01 17:49:06.616 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.616
2025-07-01 17:49:06.616 > yield from g
2025-07-01 17:49:06.616
2025-07-01 17:49:06.616 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.616 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.616
2025-07-01 17:49:06.616 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.616 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.616 alo = 167, ahi = 1101
2025-07-01 17:49:06.616 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.616 blo = 167, bhi = 1101
2025-07-01 17:49:06.617
2025-07-01 17:49:06.617 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.617 r"""
2025-07-01 17:49:06.617 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.617 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.617 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.617 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.617
2025-07-01 17:49:06.617 Example:
2025-07-01 17:49:06.617
2025-07-01 17:49:06.617 >>> d = Differ()
2025-07-01 17:49:06.617 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.617 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.617 >>> print(''.join(results), end="")
2025-07-01 17:49:06.617 - abcDefghiJkl
2025-07-01 17:49:06.621 + abcdefGhijkl
2025-07-01 17:49:06.621 """
2025-07-01 17:49:06.622
2025-07-01 17:49:06.622 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.622 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.622 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.622 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.622 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.622
2025-07-01 17:49:06.622 # search for the pair that matches best without being identical
2025-07-01 17:49:06.622 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.622 # on junk -- unless we have to)
2025-07-01 17:49:06.622 for j in range(blo, bhi):
2025-07-01 17:49:06.622 bj = b[j]
2025-07-01 17:49:06.622 cruncher.set_seq2(bj)
2025-07-01 17:49:06.622 for i in range(alo, ahi):
2025-07-01 17:49:06.622 ai = a[i]
2025-07-01 17:49:06.622 if ai == bj:
2025-07-01 17:49:06.623 if eqi is None:
2025-07-01 17:49:06.623 eqi, eqj = i, j
2025-07-01 17:49:06.623 continue
2025-07-01 17:49:06.623 cruncher.set_seq1(ai)
2025-07-01 17:49:06.623 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.623 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.623 # compares by a factor of 3.
2025-07-01 17:49:06.623 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.623 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.623 # of the computation is cached by cruncher
2025-07-01 17:49:06.623 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.623 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.623 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.623 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.623 if best_ratio < cutoff:
2025-07-01 17:49:06.624 # no non-identical "pretty close" pair
2025-07-01 17:49:06.624 if eqi is None:
2025-07-01 17:49:06.624 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.624 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.624 return
2025-07-01 17:49:06.624 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.624 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.624 else:
2025-07-01 17:49:06.624 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.624 eqi = None
2025-07-01 17:49:06.624
2025-07-01 17:49:06.624 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.624 # identical
2025-07-01 17:49:06.624
2025-07-01 17:49:06.624 # pump out diffs from before the synch point
2025-07-01 17:49:06.625 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.625
2025-07-01 17:49:06.625 # do intraline marking on the synch pair
2025-07-01 17:49:06.625 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.625 if eqi is None:
2025-07-01 17:49:06.625 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.625 atags = btags = ""
2025-07-01 17:49:06.625 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.625 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.625 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.625 if tag == 'replace':
2025-07-01 17:49:06.625 atags += '^' * la
2025-07-01 17:49:06.625 btags += '^' * lb
2025-07-01 17:49:06.625 elif tag == 'delete':
2025-07-01 17:49:06.625 atags += '-' * la
2025-07-01 17:49:06.625 elif tag == 'insert':
2025-07-01 17:49:06.626 btags += '+' * lb
2025-07-01 17:49:06.626 elif tag == 'equal':
2025-07-01 17:49:06.626 atags += ' ' * la
2025-07-01 17:49:06.626 btags += ' ' * lb
2025-07-01 17:49:06.626 else:
2025-07-01 17:49:06.626 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.626 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.626 else:
2025-07-01 17:49:06.626 # the synch pair is identical
2025-07-01 17:49:06.626 yield ' ' + aelt
2025-07-01 17:49:06.626
2025-07-01 17:49:06.626 # pump out diffs from after the synch point
2025-07-01 17:49:06.626 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.626
2025-07-01 17:49:06.626 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.626 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.626
2025-07-01 17:49:06.627 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.627 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.627 alo = 168, ahi = 1101
2025-07-01 17:49:06.627 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.627 blo = 168, bhi = 1101
2025-07-01 17:49:06.627
2025-07-01 17:49:06.627 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.627 g = []
2025-07-01 17:49:06.627 if alo < ahi:
2025-07-01 17:49:06.627 if blo < bhi:
2025-07-01 17:49:06.627 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.627 else:
2025-07-01 17:49:06.627 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.627 elif blo < bhi:
2025-07-01 17:49:06.627 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.627
2025-07-01 17:49:06.628 > yield from g
2025-07-01 17:49:06.628
2025-07-01 17:49:06.628 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.628 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.628
2025-07-01 17:49:06.628 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.628 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.628 alo = 168, ahi = 1101
2025-07-01 17:49:06.628 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.628 blo = 168, bhi = 1101
2025-07-01 17:49:06.628
2025-07-01 17:49:06.628 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.628 r"""
2025-07-01 17:49:06.628 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.628 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.628 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.629 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.629
2025-07-01 17:49:06.629 Example:
2025-07-01 17:49:06.629
2025-07-01 17:49:06.629 >>> d = Differ()
2025-07-01 17:49:06.629 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.629 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.629 >>> print(''.join(results), end="")
2025-07-01 17:49:06.629 - abcDefghiJkl
2025-07-01 17:49:06.629 + abcdefGhijkl
2025-07-01 17:49:06.629 """
2025-07-01 17:49:06.629
2025-07-01 17:49:06.629 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.629 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.629 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.630 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.630 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.630
2025-07-01 17:49:06.630 # search for the pair that matches best without being identical
2025-07-01 17:49:06.630 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.630 # on junk -- unless we have to)
2025-07-01 17:49:06.630 for j in range(blo, bhi):
2025-07-01 17:49:06.630 bj = b[j]
2025-07-01 17:49:06.630 cruncher.set_seq2(bj)
2025-07-01 17:49:06.630 for i in range(alo, ahi):
2025-07-01 17:49:06.630 ai = a[i]
2025-07-01 17:49:06.630 if ai == bj:
2025-07-01 17:49:06.630 if eqi is None:
2025-07-01 17:49:06.630 eqi, eqj = i, j
2025-07-01 17:49:06.630 continue
2025-07-01 17:49:06.630 cruncher.set_seq1(ai)
2025-07-01 17:49:06.631 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.631 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.631 # compares by a factor of 3.
2025-07-01 17:49:06.631 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.631 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.631 # of the computation is cached by cruncher
2025-07-01 17:49:06.631 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.631 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.631 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.631 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.631 if best_ratio < cutoff:
2025-07-01 17:49:06.631 # no non-identical "pretty close" pair
2025-07-01 17:49:06.631 if eqi is None:
2025-07-01 17:49:06.631 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.631 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.632 return
2025-07-01 17:49:06.632 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.632 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.632 else:
2025-07-01 17:49:06.632 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.632 eqi = None
2025-07-01 17:49:06.632
2025-07-01 17:49:06.632 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.632 # identical
2025-07-01 17:49:06.632
2025-07-01 17:49:06.632 # pump out diffs from before the synch point
2025-07-01 17:49:06.632 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.632
2025-07-01 17:49:06.632 # do intraline marking on the synch pair
2025-07-01 17:49:06.632 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.632 if eqi is None:
2025-07-01 17:49:06.632 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.633 atags = btags = ""
2025-07-01 17:49:06.633 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.633 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.633 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.633 if tag == 'replace':
2025-07-01 17:49:06.633 atags += '^' * la
2025-07-01 17:49:06.633 btags += '^' * lb
2025-07-01 17:49:06.633 elif tag == 'delete':
2025-07-01 17:49:06.633 atags += '-' * la
2025-07-01 17:49:06.633 elif tag == 'insert':
2025-07-01 17:49:06.633 btags += '+' * lb
2025-07-01 17:49:06.633 elif tag == 'equal':
2025-07-01 17:49:06.633 atags += ' ' * la
2025-07-01 17:49:06.633 btags += ' ' * lb
2025-07-01 17:49:06.633 else:
2025-07-01 17:49:06.633 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.634 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.638 else:
2025-07-01 17:49:06.639 # the synch pair is identical
2025-07-01 17:49:06.639 yield ' ' + aelt
2025-07-01 17:49:06.639
2025-07-01 17:49:06.639 # pump out diffs from after the synch point
2025-07-01 17:49:06.639 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.639
2025-07-01 17:49:06.639 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.639 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.639
2025-07-01 17:49:06.639 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.639 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.639 alo = 169, ahi = 1101
2025-07-01 17:49:06.639 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.639 blo = 169, bhi = 1101
2025-07-01 17:49:06.639
2025-07-01 17:49:06.639 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.640 g = []
2025-07-01 17:49:06.640 if alo < ahi:
2025-07-01 17:49:06.640 if blo < bhi:
2025-07-01 17:49:06.640 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.640 else:
2025-07-01 17:49:06.640 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.640 elif blo < bhi:
2025-07-01 17:49:06.640 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.640
2025-07-01 17:49:06.640 > yield from g
2025-07-01 17:49:06.640
2025-07-01 17:49:06.640 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.640 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.640
2025-07-01 17:49:06.640 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.640 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.641 alo = 169, ahi = 1101
2025-07-01 17:49:06.641 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.641 blo = 169, bhi = 1101
2025-07-01 17:49:06.641
2025-07-01 17:49:06.641 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.641 r"""
2025-07-01 17:49:06.641 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.641 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.641 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.641 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.641
2025-07-01 17:49:06.641 Example:
2025-07-01 17:49:06.641
2025-07-01 17:49:06.641 >>> d = Differ()
2025-07-01 17:49:06.641 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.641 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.641 >>> print(''.join(results), end="")
2025-07-01 17:49:06.642 - abcDefghiJkl
2025-07-01 17:49:06.642 + abcdefGhijkl
2025-07-01 17:49:06.642 """
2025-07-01 17:49:06.642
2025-07-01 17:49:06.642 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.642 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.642 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.642 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.642 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.642
2025-07-01 17:49:06.642 # search for the pair that matches best without being identical
2025-07-01 17:49:06.642 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.642 # on junk -- unless we have to)
2025-07-01 17:49:06.642 for j in range(blo, bhi):
2025-07-01 17:49:06.643 bj = b[j]
2025-07-01 17:49:06.643 cruncher.set_seq2(bj)
2025-07-01 17:49:06.643 for i in range(alo, ahi):
2025-07-01 17:49:06.643 ai = a[i]
2025-07-01 17:49:06.643 if ai == bj:
2025-07-01 17:49:06.643 if eqi is None:
2025-07-01 17:49:06.643 eqi, eqj = i, j
2025-07-01 17:49:06.643 continue
2025-07-01 17:49:06.643 cruncher.set_seq1(ai)
2025-07-01 17:49:06.643 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.643 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.643 # compares by a factor of 3.
2025-07-01 17:49:06.643 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.643 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.643 # of the computation is cached by cruncher
2025-07-01 17:49:06.643 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.644 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.644 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.644 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.644 if best_ratio < cutoff:
2025-07-01 17:49:06.644 # no non-identical "pretty close" pair
2025-07-01 17:49:06.644 if eqi is None:
2025-07-01 17:49:06.644 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.644 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.644 return
2025-07-01 17:49:06.644 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.644 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.644 else:
2025-07-01 17:49:06.644 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.644 eqi = None
2025-07-01 17:49:06.644
2025-07-01 17:49:06.644 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.644 # identical
2025-07-01 17:49:06.645
2025-07-01 17:49:06.645 # pump out diffs from before the synch point
2025-07-01 17:49:06.645 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.645
2025-07-01 17:49:06.645 # do intraline marking on the synch pair
2025-07-01 17:49:06.645 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.645 if eqi is None:
2025-07-01 17:49:06.645 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.645 atags = btags = ""
2025-07-01 17:49:06.645 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.645 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.645 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.645 if tag == 'replace':
2025-07-01 17:49:06.645 atags += '^' * la
2025-07-01 17:49:06.645 btags += '^' * lb
2025-07-01 17:49:06.645 elif tag == 'delete':
2025-07-01 17:49:06.645 atags += '-' * la
2025-07-01 17:49:06.646 elif tag == 'insert':
2025-07-01 17:49:06.646 btags += '+' * lb
2025-07-01 17:49:06.646 elif tag == 'equal':
2025-07-01 17:49:06.646 atags += ' ' * la
2025-07-01 17:49:06.646 btags += ' ' * lb
2025-07-01 17:49:06.646 else:
2025-07-01 17:49:06.646 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.646 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.646 else:
2025-07-01 17:49:06.646 # the synch pair is identical
2025-07-01 17:49:06.646 yield ' ' + aelt
2025-07-01 17:49:06.646
2025-07-01 17:49:06.646 # pump out diffs from after the synch point
2025-07-01 17:49:06.646 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.646
2025-07-01 17:49:06.646 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.647 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.647
2025-07-01 17:49:06.647 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.647 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.647 alo = 170, ahi = 1101
2025-07-01 17:49:06.647 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.647 blo = 170, bhi = 1101
2025-07-01 17:49:06.647
2025-07-01 17:49:06.647 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.647 g = []
2025-07-01 17:49:06.647 if alo < ahi:
2025-07-01 17:49:06.647 if blo < bhi:
2025-07-01 17:49:06.647 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.647 else:
2025-07-01 17:49:06.647 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.647 elif blo < bhi:
2025-07-01 17:49:06.648 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.648
2025-07-01 17:49:06.648 > yield from g
2025-07-01 17:49:06.648
2025-07-01 17:49:06.648 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.648 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.648
2025-07-01 17:49:06.648 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.648 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.648 alo = 170, ahi = 1101
2025-07-01 17:49:06.648 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.648 blo = 170, bhi = 1101
2025-07-01 17:49:06.648
2025-07-01 17:49:06.648 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.648 r"""
2025-07-01 17:49:06.649 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.649 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.649 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.649 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.649
2025-07-01 17:49:06.649 Example:
2025-07-01 17:49:06.649
2025-07-01 17:49:06.649 >>> d = Differ()
2025-07-01 17:49:06.649 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.649 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.649 >>> print(''.join(results), end="")
2025-07-01 17:49:06.649 - abcDefghiJkl
2025-07-01 17:49:06.649 + abcdefGhijkl
2025-07-01 17:49:06.649 """
2025-07-01 17:49:06.650
2025-07-01 17:49:06.653 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.653 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.653 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.653 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.653 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.653
2025-07-01 17:49:06.653 # search for the pair that matches best without being identical
2025-07-01 17:49:06.653 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.653 # on junk -- unless we have to)
2025-07-01 17:49:06.653 for j in range(blo, bhi):
2025-07-01 17:49:06.654 bj = b[j]
2025-07-01 17:49:06.654 cruncher.set_seq2(bj)
2025-07-01 17:49:06.654 for i in range(alo, ahi):
2025-07-01 17:49:06.654 ai = a[i]
2025-07-01 17:49:06.654 if ai == bj:
2025-07-01 17:49:06.654 if eqi is None:
2025-07-01 17:49:06.654 eqi, eqj = i, j
2025-07-01 17:49:06.654 continue
2025-07-01 17:49:06.654 cruncher.set_seq1(ai)
2025-07-01 17:49:06.654 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.654 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.654 # compares by a factor of 3.
2025-07-01 17:49:06.654 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.654 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.654 # of the computation is cached by cruncher
2025-07-01 17:49:06.654 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.655 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.655 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.655 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.655 if best_ratio < cutoff:
2025-07-01 17:49:06.655 # no non-identical "pretty close" pair
2025-07-01 17:49:06.655 if eqi is None:
2025-07-01 17:49:06.655 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.655 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.655 return
2025-07-01 17:49:06.655 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.655 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.655 else:
2025-07-01 17:49:06.655 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.655 eqi = None
2025-07-01 17:49:06.655
2025-07-01 17:49:06.655 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.655 # identical
2025-07-01 17:49:06.656
2025-07-01 17:49:06.656 # pump out diffs from before the synch point
2025-07-01 17:49:06.656 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.656
2025-07-01 17:49:06.656 # do intraline marking on the synch pair
2025-07-01 17:49:06.656 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.656 if eqi is None:
2025-07-01 17:49:06.656 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.656 atags = btags = ""
2025-07-01 17:49:06.656 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.656 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.656 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.656 if tag == 'replace':
2025-07-01 17:49:06.656 atags += '^' * la
2025-07-01 17:49:06.656 btags += '^' * lb
2025-07-01 17:49:06.656 elif tag == 'delete':
2025-07-01 17:49:06.657 atags += '-' * la
2025-07-01 17:49:06.657 elif tag == 'insert':
2025-07-01 17:49:06.657 btags += '+' * lb
2025-07-01 17:49:06.657 elif tag == 'equal':
2025-07-01 17:49:06.657 atags += ' ' * la
2025-07-01 17:49:06.657 btags += ' ' * lb
2025-07-01 17:49:06.657 else:
2025-07-01 17:49:06.657 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.657 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.657 else:
2025-07-01 17:49:06.657 # the synch pair is identical
2025-07-01 17:49:06.657 yield ' ' + aelt
2025-07-01 17:49:06.657
2025-07-01 17:49:06.657 # pump out diffs from after the synch point
2025-07-01 17:49:06.657 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.658
2025-07-01 17:49:06.658 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.658 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.658
2025-07-01 17:49:06.658 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.658 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.658 alo = 171, ahi = 1101
2025-07-01 17:49:06.658 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.658 blo = 171, bhi = 1101
2025-07-01 17:49:06.658
2025-07-01 17:49:06.658 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.658 g = []
2025-07-01 17:49:06.658 if alo < ahi:
2025-07-01 17:49:06.658 if blo < bhi:
2025-07-01 17:49:06.658 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.658 else:
2025-07-01 17:49:06.659 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.659 elif blo < bhi:
2025-07-01 17:49:06.659 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.659
2025-07-01 17:49:06.659 > yield from g
2025-07-01 17:49:06.659
2025-07-01 17:49:06.659 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.659 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.659
2025-07-01 17:49:06.659 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.659 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.659 alo = 171, ahi = 1101
2025-07-01 17:49:06.659 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.659 blo = 171, bhi = 1101
2025-07-01 17:49:06.659
2025-07-01 17:49:06.659 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.659 r"""
2025-07-01 17:49:06.660 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.660 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.660 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.660 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.660
2025-07-01 17:49:06.660 Example:
2025-07-01 17:49:06.660
2025-07-01 17:49:06.660 >>> d = Differ()
2025-07-01 17:49:06.660 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.660 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.660 >>> print(''.join(results), end="")
2025-07-01 17:49:06.660 - abcDefghiJkl
2025-07-01 17:49:06.660 + abcdefGhijkl
2025-07-01 17:49:06.660 """
2025-07-01 17:49:06.660
2025-07-01 17:49:06.661 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.661 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.661 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.661 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.661 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.661
2025-07-01 17:49:06.661 # search for the pair that matches best without being identical
2025-07-01 17:49:06.661 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.661 # on junk -- unless we have to)
2025-07-01 17:49:06.661 for j in range(blo, bhi):
2025-07-01 17:49:06.661 bj = b[j]
2025-07-01 17:49:06.661 cruncher.set_seq2(bj)
2025-07-01 17:49:06.661 for i in range(alo, ahi):
2025-07-01 17:49:06.661 ai = a[i]
2025-07-01 17:49:06.661 if ai == bj:
2025-07-01 17:49:06.661 if eqi is None:
2025-07-01 17:49:06.662 eqi, eqj = i, j
2025-07-01 17:49:06.662 continue
2025-07-01 17:49:06.662 cruncher.set_seq1(ai)
2025-07-01 17:49:06.662 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.662 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.662 # compares by a factor of 3.
2025-07-01 17:49:06.662 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.662 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.662 # of the computation is cached by cruncher
2025-07-01 17:49:06.662 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.662 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.662 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.662 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.662 if best_ratio < cutoff:
2025-07-01 17:49:06.662 # no non-identical "pretty close" pair
2025-07-01 17:49:06.662 if eqi is None:
2025-07-01 17:49:06.662 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.663 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.663 return
2025-07-01 17:49:06.663 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.663 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.663 else:
2025-07-01 17:49:06.663 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.663 eqi = None
2025-07-01 17:49:06.663
2025-07-01 17:49:06.663 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.663 # identical
2025-07-01 17:49:06.663
2025-07-01 17:49:06.663 # pump out diffs from before the synch point
2025-07-01 17:49:06.663 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.663
2025-07-01 17:49:06.663 # do intraline marking on the synch pair
2025-07-01 17:49:06.663 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.664 if eqi is None:
2025-07-01 17:49:06.664 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.664 atags = btags = ""
2025-07-01 17:49:06.664 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.664 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.664 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.664 if tag == 'replace':
2025-07-01 17:49:06.664 atags += '^' * la
2025-07-01 17:49:06.664 btags += '^' * lb
2025-07-01 17:49:06.664 elif tag == 'delete':
2025-07-01 17:49:06.664 atags += '-' * la
2025-07-01 17:49:06.664 elif tag == 'insert':
2025-07-01 17:49:06.664 btags += '+' * lb
2025-07-01 17:49:06.664 elif tag == 'equal':
2025-07-01 17:49:06.664 atags += ' ' * la
2025-07-01 17:49:06.664 btags += ' ' * lb
2025-07-01 17:49:06.665 else:
2025-07-01 17:49:06.665 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.665 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.665 else:
2025-07-01 17:49:06.665 # the synch pair is identical
2025-07-01 17:49:06.665 yield ' ' + aelt
2025-07-01 17:49:06.665
2025-07-01 17:49:06.665 # pump out diffs from after the synch point
2025-07-01 17:49:06.665 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.665
2025-07-01 17:49:06.665 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.665 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.665
2025-07-01 17:49:06.665 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.665 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.665 alo = 172, ahi = 1101
2025-07-01 17:49:06.670 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.670 blo = 172, bhi = 1101
2025-07-01 17:49:06.671
2025-07-01 17:49:06.671 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.671 g = []
2025-07-01 17:49:06.671 if alo < ahi:
2025-07-01 17:49:06.671 if blo < bhi:
2025-07-01 17:49:06.671 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.671 else:
2025-07-01 17:49:06.671 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.671 elif blo < bhi:
2025-07-01 17:49:06.671 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.671
2025-07-01 17:49:06.671 > yield from g
2025-07-01 17:49:06.671
2025-07-01 17:49:06.671 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.671 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.671
2025-07-01 17:49:06.672 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.672 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.672 alo = 172, ahi = 1101
2025-07-01 17:49:06.672 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.672 blo = 172, bhi = 1101
2025-07-01 17:49:06.672
2025-07-01 17:49:06.672 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.672 r"""
2025-07-01 17:49:06.672 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.672 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.672 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.672 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.672
2025-07-01 17:49:06.672 Example:
2025-07-01 17:49:06.672
2025-07-01 17:49:06.672 >>> d = Differ()
2025-07-01 17:49:06.673 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.673 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.673 >>> print(''.join(results), end="")
2025-07-01 17:49:06.673 - abcDefghiJkl
2025-07-01 17:49:06.673 + abcdefGhijkl
2025-07-01 17:49:06.673 """
2025-07-01 17:49:06.673
2025-07-01 17:49:06.673 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.673 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.673 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.673 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.673 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.673
2025-07-01 17:49:06.673 # search for the pair that matches best without being identical
2025-07-01 17:49:06.673 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.674 # on junk -- unless we have to)
2025-07-01 17:49:06.674 for j in range(blo, bhi):
2025-07-01 17:49:06.674 bj = b[j]
2025-07-01 17:49:06.674 cruncher.set_seq2(bj)
2025-07-01 17:49:06.674 for i in range(alo, ahi):
2025-07-01 17:49:06.674 ai = a[i]
2025-07-01 17:49:06.674 if ai == bj:
2025-07-01 17:49:06.674 if eqi is None:
2025-07-01 17:49:06.674 eqi, eqj = i, j
2025-07-01 17:49:06.674 continue
2025-07-01 17:49:06.674 cruncher.set_seq1(ai)
2025-07-01 17:49:06.674 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.674 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.674 # compares by a factor of 3.
2025-07-01 17:49:06.674 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.674 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.675 # of the computation is cached by cruncher
2025-07-01 17:49:06.675 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.675 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.675 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.675 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.675 if best_ratio < cutoff:
2025-07-01 17:49:06.675 # no non-identical "pretty close" pair
2025-07-01 17:49:06.675 if eqi is None:
2025-07-01 17:49:06.675 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.675 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.675 return
2025-07-01 17:49:06.675 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.675 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.675 else:
2025-07-01 17:49:06.675 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.675 eqi = None
2025-07-01 17:49:06.676
2025-07-01 17:49:06.676 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.676 # identical
2025-07-01 17:49:06.676
2025-07-01 17:49:06.676 # pump out diffs from before the synch point
2025-07-01 17:49:06.676 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.676
2025-07-01 17:49:06.676 # do intraline marking on the synch pair
2025-07-01 17:49:06.676 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.676 if eqi is None:
2025-07-01 17:49:06.676 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.676 atags = btags = ""
2025-07-01 17:49:06.676 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.676 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.676 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.676 if tag == 'replace':
2025-07-01 17:49:06.676 atags += '^' * la
2025-07-01 17:49:06.677 btags += '^' * lb
2025-07-01 17:49:06.677 elif tag == 'delete':
2025-07-01 17:49:06.677 atags += '-' * la
2025-07-01 17:49:06.677 elif tag == 'insert':
2025-07-01 17:49:06.677 btags += '+' * lb
2025-07-01 17:49:06.677 elif tag == 'equal':
2025-07-01 17:49:06.677 atags += ' ' * la
2025-07-01 17:49:06.677 btags += ' ' * lb
2025-07-01 17:49:06.677 else:
2025-07-01 17:49:06.677 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.677 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.677 else:
2025-07-01 17:49:06.677 # the synch pair is identical
2025-07-01 17:49:06.677 yield ' ' + aelt
2025-07-01 17:49:06.677
2025-07-01 17:49:06.677 # pump out diffs from after the synch point
2025-07-01 17:49:06.678 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.678
2025-07-01 17:49:06.678 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.678 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.678
2025-07-01 17:49:06.678 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.678 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.678 alo = 173, ahi = 1101
2025-07-01 17:49:06.678 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.678 blo = 173, bhi = 1101
2025-07-01 17:49:06.678
2025-07-01 17:49:06.678 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.678 g = []
2025-07-01 17:49:06.678 if alo < ahi:
2025-07-01 17:49:06.678 if blo < bhi:
2025-07-01 17:49:06.678 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.679 else:
2025-07-01 17:49:06.679 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.679 elif blo < bhi:
2025-07-01 17:49:06.679 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.679
2025-07-01 17:49:06.679 > yield from g
2025-07-01 17:49:06.679
2025-07-01 17:49:06.679 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.679 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.679
2025-07-01 17:49:06.679 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.679 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.679 alo = 173, ahi = 1101
2025-07-01 17:49:06.679 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.679 blo = 173, bhi = 1101
2025-07-01 17:49:06.680
2025-07-01 17:49:06.680 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.680 r"""
2025-07-01 17:49:06.680 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.680 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.680 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.680 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.680
2025-07-01 17:49:06.680 Example:
2025-07-01 17:49:06.680
2025-07-01 17:49:06.680 >>> d = Differ()
2025-07-01 17:49:06.680 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.680 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.680 >>> print(''.join(results), end="")
2025-07-01 17:49:06.680 - abcDefghiJkl
2025-07-01 17:49:06.681 + abcdefGhijkl
2025-07-01 17:49:06.681 """
2025-07-01 17:49:06.681
2025-07-01 17:49:06.681 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.681 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.681 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.681 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.681 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.681
2025-07-01 17:49:06.681 # search for the pair that matches best without being identical
2025-07-01 17:49:06.681 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.681 # on junk -- unless we have to)
2025-07-01 17:49:06.681 for j in range(blo, bhi):
2025-07-01 17:49:06.681 bj = b[j]
2025-07-01 17:49:06.681 cruncher.set_seq2(bj)
2025-07-01 17:49:06.681 for i in range(alo, ahi):
2025-07-01 17:49:06.682 ai = a[i]
2025-07-01 17:49:06.685 if ai == bj:
2025-07-01 17:49:06.685 if eqi is None:
2025-07-01 17:49:06.685 eqi, eqj = i, j
2025-07-01 17:49:06.685 continue
2025-07-01 17:49:06.685 cruncher.set_seq1(ai)
2025-07-01 17:49:06.685 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.685 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.685 # compares by a factor of 3.
2025-07-01 17:49:06.685 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.686 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.686 # of the computation is cached by cruncher
2025-07-01 17:49:06.686 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.686 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.686 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.686 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.686 if best_ratio < cutoff:
2025-07-01 17:49:06.686 # no non-identical "pretty close" pair
2025-07-01 17:49:06.686 if eqi is None:
2025-07-01 17:49:06.686 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.686 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.686 return
2025-07-01 17:49:06.686 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.686 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.686 else:
2025-07-01 17:49:06.686 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.686 eqi = None
2025-07-01 17:49:06.687
2025-07-01 17:49:06.687 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.687 # identical
2025-07-01 17:49:06.687
2025-07-01 17:49:06.687 # pump out diffs from before the synch point
2025-07-01 17:49:06.687 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.687
2025-07-01 17:49:06.687 # do intraline marking on the synch pair
2025-07-01 17:49:06.687 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.687 if eqi is None:
2025-07-01 17:49:06.687 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.687 atags = btags = ""
2025-07-01 17:49:06.687 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.687 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.687 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.687 if tag == 'replace':
2025-07-01 17:49:06.688 atags += '^' * la
2025-07-01 17:49:06.688 btags += '^' * lb
2025-07-01 17:49:06.688 elif tag == 'delete':
2025-07-01 17:49:06.688 atags += '-' * la
2025-07-01 17:49:06.688 elif tag == 'insert':
2025-07-01 17:49:06.688 btags += '+' * lb
2025-07-01 17:49:06.688 elif tag == 'equal':
2025-07-01 17:49:06.688 atags += ' ' * la
2025-07-01 17:49:06.688 btags += ' ' * lb
2025-07-01 17:49:06.688 else:
2025-07-01 17:49:06.688 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.688 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.688 else:
2025-07-01 17:49:06.688 # the synch pair is identical
2025-07-01 17:49:06.688 yield ' ' + aelt
2025-07-01 17:49:06.688
2025-07-01 17:49:06.688 # pump out diffs from after the synch point
2025-07-01 17:49:06.689 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.689
2025-07-01 17:49:06.689 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.689 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.689
2025-07-01 17:49:06.689 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.689 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.689 alo = 174, ahi = 1101
2025-07-01 17:49:06.689 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.689 blo = 174, bhi = 1101
2025-07-01 17:49:06.689
2025-07-01 17:49:06.689 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.689 g = []
2025-07-01 17:49:06.689 if alo < ahi:
2025-07-01 17:49:06.689 if blo < bhi:
2025-07-01 17:49:06.689 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.690 else:
2025-07-01 17:49:06.690 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.690 elif blo < bhi:
2025-07-01 17:49:06.690 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.690
2025-07-01 17:49:06.690 > yield from g
2025-07-01 17:49:06.690
2025-07-01 17:49:06.690 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.690 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.690
2025-07-01 17:49:06.690 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.690 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.690 alo = 174, ahi = 1101
2025-07-01 17:49:06.690 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.690 blo = 174, bhi = 1101
2025-07-01 17:49:06.690
2025-07-01 17:49:06.691 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.691 r"""
2025-07-01 17:49:06.691 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.691 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.691 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.691 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.691
2025-07-01 17:49:06.691 Example:
2025-07-01 17:49:06.691
2025-07-01 17:49:06.691 >>> d = Differ()
2025-07-01 17:49:06.691 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.691 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.691 >>> print(''.join(results), end="")
2025-07-01 17:49:06.691 - abcDefghiJkl
2025-07-01 17:49:06.691 + abcdefGhijkl
2025-07-01 17:49:06.692 """
2025-07-01 17:49:06.692
2025-07-01 17:49:06.692 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.692 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.692 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.692 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.692 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.692
2025-07-01 17:49:06.692 # search for the pair that matches best without being identical
2025-07-01 17:49:06.692 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.692 # on junk -- unless we have to)
2025-07-01 17:49:06.692 for j in range(blo, bhi):
2025-07-01 17:49:06.692 bj = b[j]
2025-07-01 17:49:06.692 cruncher.set_seq2(bj)
2025-07-01 17:49:06.692 for i in range(alo, ahi):
2025-07-01 17:49:06.693 ai = a[i]
2025-07-01 17:49:06.693 if ai == bj:
2025-07-01 17:49:06.693 if eqi is None:
2025-07-01 17:49:06.693 eqi, eqj = i, j
2025-07-01 17:49:06.693 continue
2025-07-01 17:49:06.693 cruncher.set_seq1(ai)
2025-07-01 17:49:06.693 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.693 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.693 # compares by a factor of 3.
2025-07-01 17:49:06.693 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.693 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.693 # of the computation is cached by cruncher
2025-07-01 17:49:06.693 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.693 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.693 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.694 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.694 if best_ratio < cutoff:
2025-07-01 17:49:06.694 # no non-identical "pretty close" pair
2025-07-01 17:49:06.694 if eqi is None:
2025-07-01 17:49:06.694 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.694 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.694 return
2025-07-01 17:49:06.694 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.694 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.694 else:
2025-07-01 17:49:06.694 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.694 eqi = None
2025-07-01 17:49:06.694
2025-07-01 17:49:06.694 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.694 # identical
2025-07-01 17:49:06.694
2025-07-01 17:49:06.695 # pump out diffs from before the synch point
2025-07-01 17:49:06.695 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.695
2025-07-01 17:49:06.695 # do intraline marking on the synch pair
2025-07-01 17:49:06.695 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.695 if eqi is None:
2025-07-01 17:49:06.695 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.695 atags = btags = ""
2025-07-01 17:49:06.695 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.695 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.695 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.695 if tag == 'replace':
2025-07-01 17:49:06.695 atags += '^' * la
2025-07-01 17:49:06.695 btags += '^' * lb
2025-07-01 17:49:06.695 elif tag == 'delete':
2025-07-01 17:49:06.695 atags += '-' * la
2025-07-01 17:49:06.695 elif tag == 'insert':
2025-07-01 17:49:06.696 btags += '+' * lb
2025-07-01 17:49:06.696 elif tag == 'equal':
2025-07-01 17:49:06.696 atags += ' ' * la
2025-07-01 17:49:06.696 btags += ' ' * lb
2025-07-01 17:49:06.696 else:
2025-07-01 17:49:06.696 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.696 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.696 else:
2025-07-01 17:49:06.696 # the synch pair is identical
2025-07-01 17:49:06.696 yield ' ' + aelt
2025-07-01 17:49:06.696
2025-07-01 17:49:06.696 # pump out diffs from after the synch point
2025-07-01 17:49:06.696 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.696
2025-07-01 17:49:06.696 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.696 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.696
2025-07-01 17:49:06.696 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.697 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.697 alo = 175, ahi = 1101
2025-07-01 17:49:06.697 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.697 blo = 175, bhi = 1101
2025-07-01 17:49:06.697
2025-07-01 17:49:06.697 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.697 g = []
2025-07-01 17:49:06.697 if alo < ahi:
2025-07-01 17:49:06.697 if blo < bhi:
2025-07-01 17:49:06.697 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.697 else:
2025-07-01 17:49:06.697 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.697 elif blo < bhi:
2025-07-01 17:49:06.697 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.697
2025-07-01 17:49:06.697 > yield from g
2025-07-01 17:49:06.703
2025-07-01 17:49:06.703 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.703 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.703
2025-07-01 17:49:06.703 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.703 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.703 alo = 175, ahi = 1101
2025-07-01 17:49:06.703 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.703 blo = 175, bhi = 1101
2025-07-01 17:49:06.703
2025-07-01 17:49:06.703 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.703 r"""
2025-07-01 17:49:06.703 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.704 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.704 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.704 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.704
2025-07-01 17:49:06.704 Example:
2025-07-01 17:49:06.704
2025-07-01 17:49:06.704 >>> d = Differ()
2025-07-01 17:49:06.704 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.704 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.704 >>> print(''.join(results), end="")
2025-07-01 17:49:06.704 - abcDefghiJkl
2025-07-01 17:49:06.704 + abcdefGhijkl
2025-07-01 17:49:06.704 """
2025-07-01 17:49:06.704
2025-07-01 17:49:06.704 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.705 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.705 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.705 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.705 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.705
2025-07-01 17:49:06.705 # search for the pair that matches best without being identical
2025-07-01 17:49:06.705 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.705 # on junk -- unless we have to)
2025-07-01 17:49:06.705 for j in range(blo, bhi):
2025-07-01 17:49:06.705 bj = b[j]
2025-07-01 17:49:06.705 cruncher.set_seq2(bj)
2025-07-01 17:49:06.705 for i in range(alo, ahi):
2025-07-01 17:49:06.705 ai = a[i]
2025-07-01 17:49:06.705 if ai == bj:
2025-07-01 17:49:06.705 if eqi is None:
2025-07-01 17:49:06.706 eqi, eqj = i, j
2025-07-01 17:49:06.706 continue
2025-07-01 17:49:06.706 cruncher.set_seq1(ai)
2025-07-01 17:49:06.706 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.706 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.706 # compares by a factor of 3.
2025-07-01 17:49:06.706 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.706 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.706 # of the computation is cached by cruncher
2025-07-01 17:49:06.706 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.706 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.706 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.706 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.706 if best_ratio < cutoff:
2025-07-01 17:49:06.706 # no non-identical "pretty close" pair
2025-07-01 17:49:06.706 if eqi is None:
2025-07-01 17:49:06.707 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.707 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.707 return
2025-07-01 17:49:06.707 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.707 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.707 else:
2025-07-01 17:49:06.707 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.707 eqi = None
2025-07-01 17:49:06.707
2025-07-01 17:49:06.707 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.707 # identical
2025-07-01 17:49:06.707
2025-07-01 17:49:06.707 # pump out diffs from before the synch point
2025-07-01 17:49:06.707 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.707
2025-07-01 17:49:06.707 # do intraline marking on the synch pair
2025-07-01 17:49:06.708 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.708 if eqi is None:
2025-07-01 17:49:06.708 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.708 atags = btags = ""
2025-07-01 17:49:06.708 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.708 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.708 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.708 if tag == 'replace':
2025-07-01 17:49:06.708 atags += '^' * la
2025-07-01 17:49:06.708 btags += '^' * lb
2025-07-01 17:49:06.708 elif tag == 'delete':
2025-07-01 17:49:06.708 atags += '-' * la
2025-07-01 17:49:06.708 elif tag == 'insert':
2025-07-01 17:49:06.708 btags += '+' * lb
2025-07-01 17:49:06.708 elif tag == 'equal':
2025-07-01 17:49:06.708 atags += ' ' * la
2025-07-01 17:49:06.709 btags += ' ' * lb
2025-07-01 17:49:06.709 else:
2025-07-01 17:49:06.709 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.709 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.709 else:
2025-07-01 17:49:06.709 # the synch pair is identical
2025-07-01 17:49:06.709 yield ' ' + aelt
2025-07-01 17:49:06.709
2025-07-01 17:49:06.709 # pump out diffs from after the synch point
2025-07-01 17:49:06.709 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.709
2025-07-01 17:49:06.709 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.709 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.709
2025-07-01 17:49:06.709 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.710 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.710 alo = 176, ahi = 1101
2025-07-01 17:49:06.710 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.710 blo = 176, bhi = 1101
2025-07-01 17:49:06.710
2025-07-01 17:49:06.710 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.710 g = []
2025-07-01 17:49:06.710 if alo < ahi:
2025-07-01 17:49:06.710 if blo < bhi:
2025-07-01 17:49:06.710 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.710 else:
2025-07-01 17:49:06.710 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.710 elif blo < bhi:
2025-07-01 17:49:06.710 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.710
2025-07-01 17:49:06.710 > yield from g
2025-07-01 17:49:06.711
2025-07-01 17:49:06.711 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.711 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.711
2025-07-01 17:49:06.711 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.711 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.711 alo = 176, ahi = 1101
2025-07-01 17:49:06.711 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.711 blo = 176, bhi = 1101
2025-07-01 17:49:06.711
2025-07-01 17:49:06.711 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.711 r"""
2025-07-01 17:49:06.711 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.711 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.711 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.711 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.712
2025-07-01 17:49:06.712 Example:
2025-07-01 17:49:06.712
2025-07-01 17:49:06.712 >>> d = Differ()
2025-07-01 17:49:06.712 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.712 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.712 >>> print(''.join(results), end="")
2025-07-01 17:49:06.712 - abcDefghiJkl
2025-07-01 17:49:06.712 + abcdefGhijkl
2025-07-01 17:49:06.712 """
2025-07-01 17:49:06.712
2025-07-01 17:49:06.712 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.712 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.712 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.713 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.713 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.713
2025-07-01 17:49:06.713 # search for the pair that matches best without being identical
2025-07-01 17:49:06.713 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.713 # on junk -- unless we have to)
2025-07-01 17:49:06.713 for j in range(blo, bhi):
2025-07-01 17:49:06.713 bj = b[j]
2025-07-01 17:49:06.713 cruncher.set_seq2(bj)
2025-07-01 17:49:06.713 for i in range(alo, ahi):
2025-07-01 17:49:06.713 ai = a[i]
2025-07-01 17:49:06.713 if ai == bj:
2025-07-01 17:49:06.713 if eqi is None:
2025-07-01 17:49:06.713 eqi, eqj = i, j
2025-07-01 17:49:06.713 continue
2025-07-01 17:49:06.713 cruncher.set_seq1(ai)
2025-07-01 17:49:06.714 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.717 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.717 # compares by a factor of 3.
2025-07-01 17:49:06.717 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.717 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.717 # of the computation is cached by cruncher
2025-07-01 17:49:06.717 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.717 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.717 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.717 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.717 if best_ratio < cutoff:
2025-07-01 17:49:06.717 # no non-identical "pretty close" pair
2025-07-01 17:49:06.717 if eqi is None:
2025-07-01 17:49:06.718 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.718 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.718 return
2025-07-01 17:49:06.718 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.718 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.718 else:
2025-07-01 17:49:06.718 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.718 eqi = None
2025-07-01 17:49:06.718
2025-07-01 17:49:06.718 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.718 # identical
2025-07-01 17:49:06.718
2025-07-01 17:49:06.718 # pump out diffs from before the synch point
2025-07-01 17:49:06.718 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.718
2025-07-01 17:49:06.718 # do intraline marking on the synch pair
2025-07-01 17:49:06.719 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.719 if eqi is None:
2025-07-01 17:49:06.719 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.719 atags = btags = ""
2025-07-01 17:49:06.719 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.719 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.719 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.719 if tag == 'replace':
2025-07-01 17:49:06.719 atags += '^' * la
2025-07-01 17:49:06.719 btags += '^' * lb
2025-07-01 17:49:06.719 elif tag == 'delete':
2025-07-01 17:49:06.719 atags += '-' * la
2025-07-01 17:49:06.719 elif tag == 'insert':
2025-07-01 17:49:06.719 btags += '+' * lb
2025-07-01 17:49:06.719 elif tag == 'equal':
2025-07-01 17:49:06.719 atags += ' ' * la
2025-07-01 17:49:06.720 btags += ' ' * lb
2025-07-01 17:49:06.720 else:
2025-07-01 17:49:06.720 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.720 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.720 else:
2025-07-01 17:49:06.720 # the synch pair is identical
2025-07-01 17:49:06.720 yield ' ' + aelt
2025-07-01 17:49:06.720
2025-07-01 17:49:06.720 # pump out diffs from after the synch point
2025-07-01 17:49:06.720 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.720
2025-07-01 17:49:06.720 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.720 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.720
2025-07-01 17:49:06.720 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.720 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.721 alo = 177, ahi = 1101
2025-07-01 17:49:06.721 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.721 blo = 177, bhi = 1101
2025-07-01 17:49:06.721
2025-07-01 17:49:06.721 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.721 g = []
2025-07-01 17:49:06.721 if alo < ahi:
2025-07-01 17:49:06.721 if blo < bhi:
2025-07-01 17:49:06.721 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.721 else:
2025-07-01 17:49:06.721 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.721 elif blo < bhi:
2025-07-01 17:49:06.721 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.721
2025-07-01 17:49:06.721 > yield from g
2025-07-01 17:49:06.721
2025-07-01 17:49:06.721 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.722 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.722
2025-07-01 17:49:06.722 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.722 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.722 alo = 177, ahi = 1101
2025-07-01 17:49:06.722 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.722 blo = 177, bhi = 1101
2025-07-01 17:49:06.722
2025-07-01 17:49:06.722 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.722 r"""
2025-07-01 17:49:06.722 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.722 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.722 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.722 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.722
2025-07-01 17:49:06.723 Example:
2025-07-01 17:49:06.723
2025-07-01 17:49:06.723 >>> d = Differ()
2025-07-01 17:49:06.723 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.723 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.723 >>> print(''.join(results), end="")
2025-07-01 17:49:06.723 - abcDefghiJkl
2025-07-01 17:49:06.723 + abcdefGhijkl
2025-07-01 17:49:06.723 """
2025-07-01 17:49:06.723
2025-07-01 17:49:06.723 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.723 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.723 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.723 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.723 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.724
2025-07-01 17:49:06.724 # search for the pair that matches best without being identical
2025-07-01 17:49:06.724 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.724 # on junk -- unless we have to)
2025-07-01 17:49:06.724 for j in range(blo, bhi):
2025-07-01 17:49:06.724 bj = b[j]
2025-07-01 17:49:06.724 cruncher.set_seq2(bj)
2025-07-01 17:49:06.724 for i in range(alo, ahi):
2025-07-01 17:49:06.724 ai = a[i]
2025-07-01 17:49:06.724 if ai == bj:
2025-07-01 17:49:06.724 if eqi is None:
2025-07-01 17:49:06.724 eqi, eqj = i, j
2025-07-01 17:49:06.724 continue
2025-07-01 17:49:06.724 cruncher.set_seq1(ai)
2025-07-01 17:49:06.724 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.724 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.724 # compares by a factor of 3.
2025-07-01 17:49:06.725 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.725 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.725 # of the computation is cached by cruncher
2025-07-01 17:49:06.725 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.725 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.725 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.725 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.725 if best_ratio < cutoff:
2025-07-01 17:49:06.725 # no non-identical "pretty close" pair
2025-07-01 17:49:06.725 if eqi is None:
2025-07-01 17:49:06.725 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.725 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.725 return
2025-07-01 17:49:06.725 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.725 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.725 else:
2025-07-01 17:49:06.725 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.726 eqi = None
2025-07-01 17:49:06.726
2025-07-01 17:49:06.726 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.726 # identical
2025-07-01 17:49:06.726
2025-07-01 17:49:06.726 # pump out diffs from before the synch point
2025-07-01 17:49:06.726 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.726
2025-07-01 17:49:06.726 # do intraline marking on the synch pair
2025-07-01 17:49:06.726 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.726 if eqi is None:
2025-07-01 17:49:06.726 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.726 atags = btags = ""
2025-07-01 17:49:06.726 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.726 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.726 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.727 if tag == 'replace':
2025-07-01 17:49:06.727 atags += '^' * la
2025-07-01 17:49:06.727 btags += '^' * lb
2025-07-01 17:49:06.727 elif tag == 'delete':
2025-07-01 17:49:06.727 atags += '-' * la
2025-07-01 17:49:06.727 elif tag == 'insert':
2025-07-01 17:49:06.727 btags += '+' * lb
2025-07-01 17:49:06.727 elif tag == 'equal':
2025-07-01 17:49:06.727 atags += ' ' * la
2025-07-01 17:49:06.727 btags += ' ' * lb
2025-07-01 17:49:06.727 else:
2025-07-01 17:49:06.727 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.727 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.727 else:
2025-07-01 17:49:06.727 # the synch pair is identical
2025-07-01 17:49:06.727 yield ' ' + aelt
2025-07-01 17:49:06.728
2025-07-01 17:49:06.728 # pump out diffs from after the synch point
2025-07-01 17:49:06.728 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.728
2025-07-01 17:49:06.728 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.728 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.728
2025-07-01 17:49:06.728 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.728 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.728 alo = 180, ahi = 1101
2025-07-01 17:49:06.728 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.728 blo = 180, bhi = 1101
2025-07-01 17:49:06.728
2025-07-01 17:49:06.728 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.728 g = []
2025-07-01 17:49:06.728 if alo < ahi:
2025-07-01 17:49:06.729 if blo < bhi:
2025-07-01 17:49:06.729 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.729 else:
2025-07-01 17:49:06.729 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.729 elif blo < bhi:
2025-07-01 17:49:06.729 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.729
2025-07-01 17:49:06.729 > yield from g
2025-07-01 17:49:06.729
2025-07-01 17:49:06.729 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.729 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.729
2025-07-01 17:49:06.729 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.729 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.729 alo = 180, ahi = 1101
2025-07-01 17:49:06.729 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.729 blo = 180, bhi = 1101
2025-07-01 17:49:06.734
2025-07-01 17:49:06.734 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.735 r"""
2025-07-01 17:49:06.735 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.735 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.735 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.735 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.735
2025-07-01 17:49:06.735 Example:
2025-07-01 17:49:06.735
2025-07-01 17:49:06.735 >>> d = Differ()
2025-07-01 17:49:06.735 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.735 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.735 >>> print(''.join(results), end="")
2025-07-01 17:49:06.735 - abcDefghiJkl
2025-07-01 17:49:06.735 + abcdefGhijkl
2025-07-01 17:49:06.735 """
2025-07-01 17:49:06.736
2025-07-01 17:49:06.736 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.736 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.736 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.736 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.736 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.736
2025-07-01 17:49:06.736 # search for the pair that matches best without being identical
2025-07-01 17:49:06.736 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.736 # on junk -- unless we have to)
2025-07-01 17:49:06.736 for j in range(blo, bhi):
2025-07-01 17:49:06.736 bj = b[j]
2025-07-01 17:49:06.736 cruncher.set_seq2(bj)
2025-07-01 17:49:06.736 for i in range(alo, ahi):
2025-07-01 17:49:06.736 ai = a[i]
2025-07-01 17:49:06.736 if ai == bj:
2025-07-01 17:49:06.736 if eqi is None:
2025-07-01 17:49:06.737 eqi, eqj = i, j
2025-07-01 17:49:06.737 continue
2025-07-01 17:49:06.737 cruncher.set_seq1(ai)
2025-07-01 17:49:06.737 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.737 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.737 # compares by a factor of 3.
2025-07-01 17:49:06.737 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.737 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.737 # of the computation is cached by cruncher
2025-07-01 17:49:06.737 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.737 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.737 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.737 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.737 if best_ratio < cutoff:
2025-07-01 17:49:06.737 # no non-identical "pretty close" pair
2025-07-01 17:49:06.737 if eqi is None:
2025-07-01 17:49:06.738 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.738 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.738 return
2025-07-01 17:49:06.738 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.738 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.738 else:
2025-07-01 17:49:06.738 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.738 eqi = None
2025-07-01 17:49:06.738
2025-07-01 17:49:06.738 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.738 # identical
2025-07-01 17:49:06.738
2025-07-01 17:49:06.738 # pump out diffs from before the synch point
2025-07-01 17:49:06.738 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.739
2025-07-01 17:49:06.739 # do intraline marking on the synch pair
2025-07-01 17:49:06.739 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.739 if eqi is None:
2025-07-01 17:49:06.739 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.739 atags = btags = ""
2025-07-01 17:49:06.739 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.739 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.739 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.739 if tag == 'replace':
2025-07-01 17:49:06.739 atags += '^' * la
2025-07-01 17:49:06.739 btags += '^' * lb
2025-07-01 17:49:06.739 elif tag == 'delete':
2025-07-01 17:49:06.739 atags += '-' * la
2025-07-01 17:49:06.739 elif tag == 'insert':
2025-07-01 17:49:06.740 btags += '+' * lb
2025-07-01 17:49:06.740 elif tag == 'equal':
2025-07-01 17:49:06.740 atags += ' ' * la
2025-07-01 17:49:06.740 btags += ' ' * lb
2025-07-01 17:49:06.740 else:
2025-07-01 17:49:06.740 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.740 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.740 else:
2025-07-01 17:49:06.740 # the synch pair is identical
2025-07-01 17:49:06.740 yield ' ' + aelt
2025-07-01 17:49:06.740
2025-07-01 17:49:06.740 # pump out diffs from after the synch point
2025-07-01 17:49:06.740 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.740
2025-07-01 17:49:06.740 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.740 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.741
2025-07-01 17:49:06.741 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.741 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.741 alo = 181, ahi = 1101
2025-07-01 17:49:06.741 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.741 blo = 181, bhi = 1101
2025-07-01 17:49:06.741
2025-07-01 17:49:06.741 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.741 g = []
2025-07-01 17:49:06.741 if alo < ahi:
2025-07-01 17:49:06.741 if blo < bhi:
2025-07-01 17:49:06.741 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.741 else:
2025-07-01 17:49:06.741 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.741 elif blo < bhi:
2025-07-01 17:49:06.741 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.741
2025-07-01 17:49:06.742 > yield from g
2025-07-01 17:49:06.742
2025-07-01 17:49:06.742 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.742 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.742
2025-07-01 17:49:06.742 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.742 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.742 alo = 181, ahi = 1101
2025-07-01 17:49:06.742 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.742 blo = 181, bhi = 1101
2025-07-01 17:49:06.742
2025-07-01 17:49:06.742 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.742 r"""
2025-07-01 17:49:06.742 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.742 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.742 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.742 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.743
2025-07-01 17:49:06.743 Example:
2025-07-01 17:49:06.743
2025-07-01 17:49:06.743 >>> d = Differ()
2025-07-01 17:49:06.743 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.743 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.743 >>> print(''.join(results), end="")
2025-07-01 17:49:06.743 - abcDefghiJkl
2025-07-01 17:49:06.743 + abcdefGhijkl
2025-07-01 17:49:06.743 """
2025-07-01 17:49:06.743
2025-07-01 17:49:06.743 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.743 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.743 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.743 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.744 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.744
2025-07-01 17:49:06.744 # search for the pair that matches best without being identical
2025-07-01 17:49:06.744 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.744 # on junk -- unless we have to)
2025-07-01 17:49:06.744 for j in range(blo, bhi):
2025-07-01 17:49:06.744 bj = b[j]
2025-07-01 17:49:06.744 cruncher.set_seq2(bj)
2025-07-01 17:49:06.744 for i in range(alo, ahi):
2025-07-01 17:49:06.744 ai = a[i]
2025-07-01 17:49:06.744 if ai == bj:
2025-07-01 17:49:06.744 if eqi is None:
2025-07-01 17:49:06.744 eqi, eqj = i, j
2025-07-01 17:49:06.744 continue
2025-07-01 17:49:06.744 cruncher.set_seq1(ai)
2025-07-01 17:49:06.744 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.745 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.745 # compares by a factor of 3.
2025-07-01 17:49:06.745 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.745 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.745 # of the computation is cached by cruncher
2025-07-01 17:49:06.745 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.745 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.745 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.745 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.745 if best_ratio < cutoff:
2025-07-01 17:49:06.745 # no non-identical "pretty close" pair
2025-07-01 17:49:06.745 if eqi is None:
2025-07-01 17:49:06.745 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.745 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.745 return
2025-07-01 17:49:06.745 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.745 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.749 else:
2025-07-01 17:49:06.749 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.749 eqi = None
2025-07-01 17:49:06.749
2025-07-01 17:49:06.749 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.749 # identical
2025-07-01 17:49:06.749
2025-07-01 17:49:06.749 # pump out diffs from before the synch point
2025-07-01 17:49:06.749 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.749
2025-07-01 17:49:06.749 # do intraline marking on the synch pair
2025-07-01 17:49:06.750 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.750 if eqi is None:
2025-07-01 17:49:06.750 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.750 atags = btags = ""
2025-07-01 17:49:06.750 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.750 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.750 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.750 if tag == 'replace':
2025-07-01 17:49:06.750 atags += '^' * la
2025-07-01 17:49:06.750 btags += '^' * lb
2025-07-01 17:49:06.750 elif tag == 'delete':
2025-07-01 17:49:06.750 atags += '-' * la
2025-07-01 17:49:06.750 elif tag == 'insert':
2025-07-01 17:49:06.750 btags += '+' * lb
2025-07-01 17:49:06.750 elif tag == 'equal':
2025-07-01 17:49:06.750 atags += ' ' * la
2025-07-01 17:49:06.750 btags += ' ' * lb
2025-07-01 17:49:06.751 else:
2025-07-01 17:49:06.751 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.751 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.751 else:
2025-07-01 17:49:06.751 # the synch pair is identical
2025-07-01 17:49:06.751 yield ' ' + aelt
2025-07-01 17:49:06.751
2025-07-01 17:49:06.751 # pump out diffs from after the synch point
2025-07-01 17:49:06.751 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.751
2025-07-01 17:49:06.751 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.751 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.751
2025-07-01 17:49:06.751 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.751 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.751 alo = 182, ahi = 1101
2025-07-01 17:49:06.751 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.752 blo = 182, bhi = 1101
2025-07-01 17:49:06.752
2025-07-01 17:49:06.752 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.752 g = []
2025-07-01 17:49:06.752 if alo < ahi:
2025-07-01 17:49:06.752 if blo < bhi:
2025-07-01 17:49:06.752 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.752 else:
2025-07-01 17:49:06.752 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.752 elif blo < bhi:
2025-07-01 17:49:06.752 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.752
2025-07-01 17:49:06.752 > yield from g
2025-07-01 17:49:06.752
2025-07-01 17:49:06.752 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.753 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.753
2025-07-01 17:49:06.753 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.753 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.753 alo = 182, ahi = 1101
2025-07-01 17:49:06.753 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.753 blo = 182, bhi = 1101
2025-07-01 17:49:06.753
2025-07-01 17:49:06.753 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.753 r"""
2025-07-01 17:49:06.753 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.753 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.753 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.753 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.753
2025-07-01 17:49:06.754 Example:
2025-07-01 17:49:06.754
2025-07-01 17:49:06.754 >>> d = Differ()
2025-07-01 17:49:06.754 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.754 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.754 >>> print(''.join(results), end="")
2025-07-01 17:49:06.754 - abcDefghiJkl
2025-07-01 17:49:06.754 + abcdefGhijkl
2025-07-01 17:49:06.754 """
2025-07-01 17:49:06.754
2025-07-01 17:49:06.754 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.754 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.754 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.754 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.754 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.755
2025-07-01 17:49:06.755 # search for the pair that matches best without being identical
2025-07-01 17:49:06.755 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.755 # on junk -- unless we have to)
2025-07-01 17:49:06.755 for j in range(blo, bhi):
2025-07-01 17:49:06.755 bj = b[j]
2025-07-01 17:49:06.755 cruncher.set_seq2(bj)
2025-07-01 17:49:06.755 for i in range(alo, ahi):
2025-07-01 17:49:06.755 ai = a[i]
2025-07-01 17:49:06.755 if ai == bj:
2025-07-01 17:49:06.755 if eqi is None:
2025-07-01 17:49:06.755 eqi, eqj = i, j
2025-07-01 17:49:06.755 continue
2025-07-01 17:49:06.755 cruncher.set_seq1(ai)
2025-07-01 17:49:06.755 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.755 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.755 # compares by a factor of 3.
2025-07-01 17:49:06.756 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.756 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.756 # of the computation is cached by cruncher
2025-07-01 17:49:06.756 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.756 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.756 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.756 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.756 if best_ratio < cutoff:
2025-07-01 17:49:06.756 # no non-identical "pretty close" pair
2025-07-01 17:49:06.756 if eqi is None:
2025-07-01 17:49:06.756 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.756 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.756 return
2025-07-01 17:49:06.756 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.756 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.756 else:
2025-07-01 17:49:06.756 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.757 eqi = None
2025-07-01 17:49:06.757
2025-07-01 17:49:06.757 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.757 # identical
2025-07-01 17:49:06.757
2025-07-01 17:49:06.757 # pump out diffs from before the synch point
2025-07-01 17:49:06.757 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.757
2025-07-01 17:49:06.757 # do intraline marking on the synch pair
2025-07-01 17:49:06.757 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.757 if eqi is None:
2025-07-01 17:49:06.757 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.757 atags = btags = ""
2025-07-01 17:49:06.757 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.757 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.757 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.757 if tag == 'replace':
2025-07-01 17:49:06.758 atags += '^' * la
2025-07-01 17:49:06.758 btags += '^' * lb
2025-07-01 17:49:06.758 elif tag == 'delete':
2025-07-01 17:49:06.758 atags += '-' * la
2025-07-01 17:49:06.758 elif tag == 'insert':
2025-07-01 17:49:06.758 btags += '+' * lb
2025-07-01 17:49:06.758 elif tag == 'equal':
2025-07-01 17:49:06.758 atags += ' ' * la
2025-07-01 17:49:06.758 btags += ' ' * lb
2025-07-01 17:49:06.758 else:
2025-07-01 17:49:06.758 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.758 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.758 else:
2025-07-01 17:49:06.758 # the synch pair is identical
2025-07-01 17:49:06.758 yield ' ' + aelt
2025-07-01 17:49:06.758
2025-07-01 17:49:06.758 # pump out diffs from after the synch point
2025-07-01 17:49:06.759 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.759
2025-07-01 17:49:06.759 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.759 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.759
2025-07-01 17:49:06.759 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.759 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.759 alo = 183, ahi = 1101
2025-07-01 17:49:06.759 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.759 blo = 183, bhi = 1101
2025-07-01 17:49:06.759
2025-07-01 17:49:06.759 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.759 g = []
2025-07-01 17:49:06.759 if alo < ahi:
2025-07-01 17:49:06.759 if blo < bhi:
2025-07-01 17:49:06.760 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.760 else:
2025-07-01 17:49:06.760 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.760 elif blo < bhi:
2025-07-01 17:49:06.760 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.760
2025-07-01 17:49:06.760 > yield from g
2025-07-01 17:49:06.760
2025-07-01 17:49:06.760 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.760 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.760
2025-07-01 17:49:06.760 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.760 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.760 alo = 183, ahi = 1101
2025-07-01 17:49:06.760 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.760 blo = 183, bhi = 1101
2025-07-01 17:49:06.761
2025-07-01 17:49:06.761 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.761 r"""
2025-07-01 17:49:06.761 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.761 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.761 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.761 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.761
2025-07-01 17:49:06.761 Example:
2025-07-01 17:49:06.761
2025-07-01 17:49:06.761 >>> d = Differ()
2025-07-01 17:49:06.761 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.761 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.761 >>> print(''.join(results), end="")
2025-07-01 17:49:06.761 - abcDefghiJkl
2025-07-01 17:49:06.761 + abcdefGhijkl
2025-07-01 17:49:06.767 """
2025-07-01 17:49:06.767
2025-07-01 17:49:06.767 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.767 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.767 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.767 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.767 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.767
2025-07-01 17:49:06.767 # search for the pair that matches best without being identical
2025-07-01 17:49:06.767 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.767 # on junk -- unless we have to)
2025-07-01 17:49:06.767 for j in range(blo, bhi):
2025-07-01 17:49:06.768 bj = b[j]
2025-07-01 17:49:06.768 cruncher.set_seq2(bj)
2025-07-01 17:49:06.768 for i in range(alo, ahi):
2025-07-01 17:49:06.768 ai = a[i]
2025-07-01 17:49:06.768 if ai == bj:
2025-07-01 17:49:06.768 if eqi is None:
2025-07-01 17:49:06.768 eqi, eqj = i, j
2025-07-01 17:49:06.768 continue
2025-07-01 17:49:06.768 cruncher.set_seq1(ai)
2025-07-01 17:49:06.768 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.768 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.768 # compares by a factor of 3.
2025-07-01 17:49:06.768 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.768 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.768 # of the computation is cached by cruncher
2025-07-01 17:49:06.768 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.769 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.769 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.769 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.769 if best_ratio < cutoff:
2025-07-01 17:49:06.769 # no non-identical "pretty close" pair
2025-07-01 17:49:06.769 if eqi is None:
2025-07-01 17:49:06.769 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.769 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.769 return
2025-07-01 17:49:06.769 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.769 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.769 else:
2025-07-01 17:49:06.769 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.769 eqi = None
2025-07-01 17:49:06.769
2025-07-01 17:49:06.769 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.769 # identical
2025-07-01 17:49:06.770
2025-07-01 17:49:06.770 # pump out diffs from before the synch point
2025-07-01 17:49:06.770 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.770
2025-07-01 17:49:06.770 # do intraline marking on the synch pair
2025-07-01 17:49:06.770 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.770 if eqi is None:
2025-07-01 17:49:06.770 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.770 atags = btags = ""
2025-07-01 17:49:06.770 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.770 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.770 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.770 if tag == 'replace':
2025-07-01 17:49:06.770 atags += '^' * la
2025-07-01 17:49:06.770 btags += '^' * lb
2025-07-01 17:49:06.770 elif tag == 'delete':
2025-07-01 17:49:06.771 atags += '-' * la
2025-07-01 17:49:06.771 elif tag == 'insert':
2025-07-01 17:49:06.771 btags += '+' * lb
2025-07-01 17:49:06.771 elif tag == 'equal':
2025-07-01 17:49:06.771 atags += ' ' * la
2025-07-01 17:49:06.771 btags += ' ' * lb
2025-07-01 17:49:06.771 else:
2025-07-01 17:49:06.771 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.771 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.771 else:
2025-07-01 17:49:06.771 # the synch pair is identical
2025-07-01 17:49:06.771 yield ' ' + aelt
2025-07-01 17:49:06.771
2025-07-01 17:49:06.771 # pump out diffs from after the synch point
2025-07-01 17:49:06.771 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.771
2025-07-01 17:49:06.771 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.771 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.772
2025-07-01 17:49:06.772 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.772 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.772 alo = 184, ahi = 1101
2025-07-01 17:49:06.772 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.772 blo = 184, bhi = 1101
2025-07-01 17:49:06.772
2025-07-01 17:49:06.772 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.772 g = []
2025-07-01 17:49:06.772 if alo < ahi:
2025-07-01 17:49:06.772 if blo < bhi:
2025-07-01 17:49:06.772 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.772 else:
2025-07-01 17:49:06.772 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.772 elif blo < bhi:
2025-07-01 17:49:06.772 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.772
2025-07-01 17:49:06.772 > yield from g
2025-07-01 17:49:06.772
2025-07-01 17:49:06.772 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.773 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.773
2025-07-01 17:49:06.773 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.773 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.773 alo = 184, ahi = 1101
2025-07-01 17:49:06.773 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.773 blo = 184, bhi = 1101
2025-07-01 17:49:06.773
2025-07-01 17:49:06.773 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.773 r"""
2025-07-01 17:49:06.773 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.773 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.773 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.773 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.773
2025-07-01 17:49:06.773 Example:
2025-07-01 17:49:06.773
2025-07-01 17:49:06.773 >>> d = Differ()
2025-07-01 17:49:06.773 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.774 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.774 >>> print(''.join(results), end="")
2025-07-01 17:49:06.774 - abcDefghiJkl
2025-07-01 17:49:06.774 + abcdefGhijkl
2025-07-01 17:49:06.774 """
2025-07-01 17:49:06.774
2025-07-01 17:49:06.774 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.774 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.774 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.774 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.774 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.774
2025-07-01 17:49:06.774 # search for the pair that matches best without being identical
2025-07-01 17:49:06.774 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.774 # on junk -- unless we have to)
2025-07-01 17:49:06.774 for j in range(blo, bhi):
2025-07-01 17:49:06.774 bj = b[j]
2025-07-01 17:49:06.774 cruncher.set_seq2(bj)
2025-07-01 17:49:06.775 for i in range(alo, ahi):
2025-07-01 17:49:06.775 ai = a[i]
2025-07-01 17:49:06.775 if ai == bj:
2025-07-01 17:49:06.775 if eqi is None:
2025-07-01 17:49:06.775 eqi, eqj = i, j
2025-07-01 17:49:06.775 continue
2025-07-01 17:49:06.775 cruncher.set_seq1(ai)
2025-07-01 17:49:06.775 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.775 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.775 # compares by a factor of 3.
2025-07-01 17:49:06.775 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.775 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.775 # of the computation is cached by cruncher
2025-07-01 17:49:06.775 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.775 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.775 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.775 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.775 if best_ratio < cutoff:
2025-07-01 17:49:06.775 # no non-identical "pretty close" pair
2025-07-01 17:49:06.775 if eqi is None:
2025-07-01 17:49:06.776 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.776 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.776 return
2025-07-01 17:49:06.776 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.776 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.776 else:
2025-07-01 17:49:06.776 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.776 eqi = None
2025-07-01 17:49:06.776
2025-07-01 17:49:06.776 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.776 # identical
2025-07-01 17:49:06.776
2025-07-01 17:49:06.776 # pump out diffs from before the synch point
2025-07-01 17:49:06.776 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.776
2025-07-01 17:49:06.776 # do intraline marking on the synch pair
2025-07-01 17:49:06.776 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.776 if eqi is None:
2025-07-01 17:49:06.776 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.776 atags = btags = ""
2025-07-01 17:49:06.777 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.777 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.777 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.777 if tag == 'replace':
2025-07-01 17:49:06.777 atags += '^' * la
2025-07-01 17:49:06.777 btags += '^' * lb
2025-07-01 17:49:06.777 elif tag == 'delete':
2025-07-01 17:49:06.777 atags += '-' * la
2025-07-01 17:49:06.777 elif tag == 'insert':
2025-07-01 17:49:06.777 btags += '+' * lb
2025-07-01 17:49:06.777 elif tag == 'equal':
2025-07-01 17:49:06.777 atags += ' ' * la
2025-07-01 17:49:06.777 btags += ' ' * lb
2025-07-01 17:49:06.777 else:
2025-07-01 17:49:06.777 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.777 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.777 else:
2025-07-01 17:49:06.777 # the synch pair is identical
2025-07-01 17:49:06.777 yield ' ' + aelt
2025-07-01 17:49:06.780
2025-07-01 17:49:06.781 # pump out diffs from after the synch point
2025-07-01 17:49:06.781 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.781
2025-07-01 17:49:06.781 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.781 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.781
2025-07-01 17:49:06.781 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.781 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.781 alo = 185, ahi = 1101
2025-07-01 17:49:06.781 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.781 blo = 185, bhi = 1101
2025-07-01 17:49:06.781
2025-07-01 17:49:06.781 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.781 g = []
2025-07-01 17:49:06.781 if alo < ahi:
2025-07-01 17:49:06.781 if blo < bhi:
2025-07-01 17:49:06.781 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.781 else:
2025-07-01 17:49:06.781 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.781 elif blo < bhi:
2025-07-01 17:49:06.782 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.782
2025-07-01 17:49:06.782 > yield from g
2025-07-01 17:49:06.782
2025-07-01 17:49:06.782 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.782 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.782
2025-07-01 17:49:06.782 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.782 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.782 alo = 185, ahi = 1101
2025-07-01 17:49:06.782 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.782 blo = 185, bhi = 1101
2025-07-01 17:49:06.782
2025-07-01 17:49:06.782 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.782 r"""
2025-07-01 17:49:06.782 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.782 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.782 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.782 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.782
2025-07-01 17:49:06.783 Example:
2025-07-01 17:49:06.783
2025-07-01 17:49:06.783 >>> d = Differ()
2025-07-01 17:49:06.783 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.783 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.783 >>> print(''.join(results), end="")
2025-07-01 17:49:06.783 - abcDefghiJkl
2025-07-01 17:49:06.783 + abcdefGhijkl
2025-07-01 17:49:06.783 """
2025-07-01 17:49:06.783
2025-07-01 17:49:06.783 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.783 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.783 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.783 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.783 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.783
2025-07-01 17:49:06.783 # search for the pair that matches best without being identical
2025-07-01 17:49:06.783 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.784 # on junk -- unless we have to)
2025-07-01 17:49:06.784 for j in range(blo, bhi):
2025-07-01 17:49:06.784 bj = b[j]
2025-07-01 17:49:06.784 cruncher.set_seq2(bj)
2025-07-01 17:49:06.784 for i in range(alo, ahi):
2025-07-01 17:49:06.784 ai = a[i]
2025-07-01 17:49:06.784 if ai == bj:
2025-07-01 17:49:06.784 if eqi is None:
2025-07-01 17:49:06.784 eqi, eqj = i, j
2025-07-01 17:49:06.784 continue
2025-07-01 17:49:06.784 cruncher.set_seq1(ai)
2025-07-01 17:49:06.784 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.784 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.784 # compares by a factor of 3.
2025-07-01 17:49:06.784 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.784 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.784 # of the computation is cached by cruncher
2025-07-01 17:49:06.784 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.784 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.785 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.785 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.785 if best_ratio < cutoff:
2025-07-01 17:49:06.785 # no non-identical "pretty close" pair
2025-07-01 17:49:06.785 if eqi is None:
2025-07-01 17:49:06.785 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.785 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.785 return
2025-07-01 17:49:06.785 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.785 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.785 else:
2025-07-01 17:49:06.785 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.785 eqi = None
2025-07-01 17:49:06.785
2025-07-01 17:49:06.785 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.785 # identical
2025-07-01 17:49:06.785
2025-07-01 17:49:06.785 # pump out diffs from before the synch point
2025-07-01 17:49:06.785 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.785
2025-07-01 17:49:06.786 # do intraline marking on the synch pair
2025-07-01 17:49:06.786 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.786 if eqi is None:
2025-07-01 17:49:06.786 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.786 atags = btags = ""
2025-07-01 17:49:06.786 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.786 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.786 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.786 if tag == 'replace':
2025-07-01 17:49:06.786 atags += '^' * la
2025-07-01 17:49:06.786 btags += '^' * lb
2025-07-01 17:49:06.786 elif tag == 'delete':
2025-07-01 17:49:06.786 atags += '-' * la
2025-07-01 17:49:06.786 elif tag == 'insert':
2025-07-01 17:49:06.786 btags += '+' * lb
2025-07-01 17:49:06.786 elif tag == 'equal':
2025-07-01 17:49:06.786 atags += ' ' * la
2025-07-01 17:49:06.786 btags += ' ' * lb
2025-07-01 17:49:06.786 else:
2025-07-01 17:49:06.786 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.787 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.787 else:
2025-07-01 17:49:06.787 # the synch pair is identical
2025-07-01 17:49:06.787 yield ' ' + aelt
2025-07-01 17:49:06.787
2025-07-01 17:49:06.787 # pump out diffs from after the synch point
2025-07-01 17:49:06.787 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.787
2025-07-01 17:49:06.787 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.787 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.787
2025-07-01 17:49:06.787 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.787 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.787 alo = 186, ahi = 1101
2025-07-01 17:49:06.787 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.787 blo = 186, bhi = 1101
2025-07-01 17:49:06.787
2025-07-01 17:49:06.787 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.787 g = []
2025-07-01 17:49:06.787 if alo < ahi:
2025-07-01 17:49:06.787 if blo < bhi:
2025-07-01 17:49:06.788 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.788 else:
2025-07-01 17:49:06.788 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.788 elif blo < bhi:
2025-07-01 17:49:06.788 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.788
2025-07-01 17:49:06.788 > yield from g
2025-07-01 17:49:06.788
2025-07-01 17:49:06.788 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.788 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.788
2025-07-01 17:49:06.788 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.788 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.788 alo = 186, ahi = 1101
2025-07-01 17:49:06.788 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.788 blo = 186, bhi = 1101
2025-07-01 17:49:06.788
2025-07-01 17:49:06.788 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.788 r"""
2025-07-01 17:49:06.789 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.789 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.789 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.789 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.789
2025-07-01 17:49:06.789 Example:
2025-07-01 17:49:06.789
2025-07-01 17:49:06.789 >>> d = Differ()
2025-07-01 17:49:06.789 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.789 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.789 >>> print(''.join(results), end="")
2025-07-01 17:49:06.789 - abcDefghiJkl
2025-07-01 17:49:06.789 + abcdefGhijkl
2025-07-01 17:49:06.789 """
2025-07-01 17:49:06.789
2025-07-01 17:49:06.789 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.789 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.789 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.790 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.790 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.790
2025-07-01 17:49:06.790 # search for the pair that matches best without being identical
2025-07-01 17:49:06.790 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.790 # on junk -- unless we have to)
2025-07-01 17:49:06.790 for j in range(blo, bhi):
2025-07-01 17:49:06.790 bj = b[j]
2025-07-01 17:49:06.790 cruncher.set_seq2(bj)
2025-07-01 17:49:06.790 for i in range(alo, ahi):
2025-07-01 17:49:06.790 ai = a[i]
2025-07-01 17:49:06.790 if ai == bj:
2025-07-01 17:49:06.790 if eqi is None:
2025-07-01 17:49:06.790 eqi, eqj = i, j
2025-07-01 17:49:06.790 continue
2025-07-01 17:49:06.790 cruncher.set_seq1(ai)
2025-07-01 17:49:06.790 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.790 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.790 # compares by a factor of 3.
2025-07-01 17:49:06.790 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.791 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.791 # of the computation is cached by cruncher
2025-07-01 17:49:06.791 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.791 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.791 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.791 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.791 if best_ratio < cutoff:
2025-07-01 17:49:06.791 # no non-identical "pretty close" pair
2025-07-01 17:49:06.791 if eqi is None:
2025-07-01 17:49:06.791 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.791 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.791 return
2025-07-01 17:49:06.791 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.791 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.791 else:
2025-07-01 17:49:06.791 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.791 eqi = None
2025-07-01 17:49:06.791
2025-07-01 17:49:06.791 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.791 # identical
2025-07-01 17:49:06.791
2025-07-01 17:49:06.792 # pump out diffs from before the synch point
2025-07-01 17:49:06.792 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.792
2025-07-01 17:49:06.792 # do intraline marking on the synch pair
2025-07-01 17:49:06.792 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.792 if eqi is None:
2025-07-01 17:49:06.792 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.792 atags = btags = ""
2025-07-01 17:49:06.792 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.792 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.792 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.792 if tag == 'replace':
2025-07-01 17:49:06.792 atags += '^' * la
2025-07-01 17:49:06.792 btags += '^' * lb
2025-07-01 17:49:06.792 elif tag == 'delete':
2025-07-01 17:49:06.792 atags += '-' * la
2025-07-01 17:49:06.792 elif tag == 'insert':
2025-07-01 17:49:06.792 btags += '+' * lb
2025-07-01 17:49:06.792 elif tag == 'equal':
2025-07-01 17:49:06.792 atags += ' ' * la
2025-07-01 17:49:06.793 btags += ' ' * lb
2025-07-01 17:49:06.793 else:
2025-07-01 17:49:06.793 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.793 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.793 else:
2025-07-01 17:49:06.793 # the synch pair is identical
2025-07-01 17:49:06.793 yield ' ' + aelt
2025-07-01 17:49:06.793
2025-07-01 17:49:06.793 # pump out diffs from after the synch point
2025-07-01 17:49:06.793 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.793
2025-07-01 17:49:06.793 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.793 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.793
2025-07-01 17:49:06.793 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.793 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.793 alo = 187, ahi = 1101
2025-07-01 17:49:06.793 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.793 blo = 187, bhi = 1101
2025-07-01 17:49:06.793
2025-07-01 17:49:06.794 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.798 g = []
2025-07-01 17:49:06.798 if alo < ahi:
2025-07-01 17:49:06.799 if blo < bhi:
2025-07-01 17:49:06.799 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.799 else:
2025-07-01 17:49:06.799 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.799 elif blo < bhi:
2025-07-01 17:49:06.799 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.799
2025-07-01 17:49:06.799 > yield from g
2025-07-01 17:49:06.799
2025-07-01 17:49:06.799 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.799 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.799
2025-07-01 17:49:06.799 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.799 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.799 alo = 187, ahi = 1101
2025-07-01 17:49:06.799 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.799 blo = 187, bhi = 1101
2025-07-01 17:49:06.799
2025-07-01 17:49:06.799 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.799 r"""
2025-07-01 17:49:06.800 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.800 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.800 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.800 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.800
2025-07-01 17:49:06.800 Example:
2025-07-01 17:49:06.800
2025-07-01 17:49:06.800 >>> d = Differ()
2025-07-01 17:49:06.800 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.800 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.800 >>> print(''.join(results), end="")
2025-07-01 17:49:06.800 - abcDefghiJkl
2025-07-01 17:49:06.800 + abcdefGhijkl
2025-07-01 17:49:06.800 """
2025-07-01 17:49:06.800
2025-07-01 17:49:06.800 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.800 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.801 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.801 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.801 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.801
2025-07-01 17:49:06.801 # search for the pair that matches best without being identical
2025-07-01 17:49:06.801 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.801 # on junk -- unless we have to)
2025-07-01 17:49:06.801 for j in range(blo, bhi):
2025-07-01 17:49:06.801 bj = b[j]
2025-07-01 17:49:06.801 cruncher.set_seq2(bj)
2025-07-01 17:49:06.801 for i in range(alo, ahi):
2025-07-01 17:49:06.801 ai = a[i]
2025-07-01 17:49:06.801 if ai == bj:
2025-07-01 17:49:06.801 if eqi is None:
2025-07-01 17:49:06.801 eqi, eqj = i, j
2025-07-01 17:49:06.801 continue
2025-07-01 17:49:06.801 cruncher.set_seq1(ai)
2025-07-01 17:49:06.801 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.801 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.801 # compares by a factor of 3.
2025-07-01 17:49:06.802 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.802 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.802 # of the computation is cached by cruncher
2025-07-01 17:49:06.802 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.802 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.802 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.802 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.802 if best_ratio < cutoff:
2025-07-01 17:49:06.802 # no non-identical "pretty close" pair
2025-07-01 17:49:06.802 if eqi is None:
2025-07-01 17:49:06.802 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.802 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.802 return
2025-07-01 17:49:06.802 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.802 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.802 else:
2025-07-01 17:49:06.802 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.802 eqi = None
2025-07-01 17:49:06.802
2025-07-01 17:49:06.803 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.803 # identical
2025-07-01 17:49:06.803
2025-07-01 17:49:06.803 # pump out diffs from before the synch point
2025-07-01 17:49:06.803 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.803
2025-07-01 17:49:06.803 # do intraline marking on the synch pair
2025-07-01 17:49:06.803 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.803 if eqi is None:
2025-07-01 17:49:06.803 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.803 atags = btags = ""
2025-07-01 17:49:06.803 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.803 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.803 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.803 if tag == 'replace':
2025-07-01 17:49:06.803 atags += '^' * la
2025-07-01 17:49:06.803 btags += '^' * lb
2025-07-01 17:49:06.803 elif tag == 'delete':
2025-07-01 17:49:06.803 atags += '-' * la
2025-07-01 17:49:06.804 elif tag == 'insert':
2025-07-01 17:49:06.804 btags += '+' * lb
2025-07-01 17:49:06.804 elif tag == 'equal':
2025-07-01 17:49:06.804 atags += ' ' * la
2025-07-01 17:49:06.804 btags += ' ' * lb
2025-07-01 17:49:06.804 else:
2025-07-01 17:49:06.804 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.804 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.804 else:
2025-07-01 17:49:06.804 # the synch pair is identical
2025-07-01 17:49:06.804 yield ' ' + aelt
2025-07-01 17:49:06.804
2025-07-01 17:49:06.804 # pump out diffs from after the synch point
2025-07-01 17:49:06.804 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.804
2025-07-01 17:49:06.804 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.804 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.804
2025-07-01 17:49:06.804 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.804 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.804 alo = 188, ahi = 1101
2025-07-01 17:49:06.805 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.805 blo = 188, bhi = 1101
2025-07-01 17:49:06.805
2025-07-01 17:49:06.805 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.805 g = []
2025-07-01 17:49:06.805 if alo < ahi:
2025-07-01 17:49:06.805 if blo < bhi:
2025-07-01 17:49:06.805 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.805 else:
2025-07-01 17:49:06.805 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.805 elif blo < bhi:
2025-07-01 17:49:06.805 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.805
2025-07-01 17:49:06.805 > yield from g
2025-07-01 17:49:06.805
2025-07-01 17:49:06.805 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.805 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.805
2025-07-01 17:49:06.805 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.805 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.806 alo = 188, ahi = 1101
2025-07-01 17:49:06.806 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.806 blo = 188, bhi = 1101
2025-07-01 17:49:06.806
2025-07-01 17:49:06.806 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.806 r"""
2025-07-01 17:49:06.806 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.806 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.806 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.806 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.806
2025-07-01 17:49:06.806 Example:
2025-07-01 17:49:06.806
2025-07-01 17:49:06.806 >>> d = Differ()
2025-07-01 17:49:06.806 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.806 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.806 >>> print(''.join(results), end="")
2025-07-01 17:49:06.806 - abcDefghiJkl
2025-07-01 17:49:06.806 + abcdefGhijkl
2025-07-01 17:49:06.807 """
2025-07-01 17:49:06.807
2025-07-01 17:49:06.807 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.807 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.807 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.807 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.807 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.807
2025-07-01 17:49:06.807 # search for the pair that matches best without being identical
2025-07-01 17:49:06.807 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.807 # on junk -- unless we have to)
2025-07-01 17:49:06.807 for j in range(blo, bhi):
2025-07-01 17:49:06.807 bj = b[j]
2025-07-01 17:49:06.807 cruncher.set_seq2(bj)
2025-07-01 17:49:06.807 for i in range(alo, ahi):
2025-07-01 17:49:06.807 ai = a[i]
2025-07-01 17:49:06.807 if ai == bj:
2025-07-01 17:49:06.807 if eqi is None:
2025-07-01 17:49:06.807 eqi, eqj = i, j
2025-07-01 17:49:06.807 continue
2025-07-01 17:49:06.808 cruncher.set_seq1(ai)
2025-07-01 17:49:06.808 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.808 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.808 # compares by a factor of 3.
2025-07-01 17:49:06.808 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.808 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.808 # of the computation is cached by cruncher
2025-07-01 17:49:06.808 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.808 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.808 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.808 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.808 if best_ratio < cutoff:
2025-07-01 17:49:06.808 # no non-identical "pretty close" pair
2025-07-01 17:49:06.808 if eqi is None:
2025-07-01 17:49:06.808 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.808 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.808 return
2025-07-01 17:49:06.808 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.808 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.808 else:
2025-07-01 17:49:06.809 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.809 eqi = None
2025-07-01 17:49:06.809
2025-07-01 17:49:06.809 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.809 # identical
2025-07-01 17:49:06.809
2025-07-01 17:49:06.809 # pump out diffs from before the synch point
2025-07-01 17:49:06.809 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.809
2025-07-01 17:49:06.809 # do intraline marking on the synch pair
2025-07-01 17:49:06.809 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.809 if eqi is None:
2025-07-01 17:49:06.809 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.809 atags = btags = ""
2025-07-01 17:49:06.809 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.809 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.809 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.809 if tag == 'replace':
2025-07-01 17:49:06.809 atags += '^' * la
2025-07-01 17:49:06.809 btags += '^' * lb
2025-07-01 17:49:06.809 elif tag == 'delete':
2025-07-01 17:49:06.812 atags += '-' * la
2025-07-01 17:49:06.813 elif tag == 'insert':
2025-07-01 17:49:06.813 btags += '+' * lb
2025-07-01 17:49:06.813 elif tag == 'equal':
2025-07-01 17:49:06.813 atags += ' ' * la
2025-07-01 17:49:06.813 btags += ' ' * lb
2025-07-01 17:49:06.813 else:
2025-07-01 17:49:06.813 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.813 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.813 else:
2025-07-01 17:49:06.813 # the synch pair is identical
2025-07-01 17:49:06.813 yield ' ' + aelt
2025-07-01 17:49:06.813
2025-07-01 17:49:06.813 # pump out diffs from after the synch point
2025-07-01 17:49:06.813 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.813
2025-07-01 17:49:06.813 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.813 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.813
2025-07-01 17:49:06.813 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.813 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.814 alo = 189, ahi = 1101
2025-07-01 17:49:06.814 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.814 blo = 189, bhi = 1101
2025-07-01 17:49:06.814
2025-07-01 17:49:06.814 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.814 g = []
2025-07-01 17:49:06.814 if alo < ahi:
2025-07-01 17:49:06.814 if blo < bhi:
2025-07-01 17:49:06.814 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.814 else:
2025-07-01 17:49:06.814 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.814 elif blo < bhi:
2025-07-01 17:49:06.814 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.814
2025-07-01 17:49:06.814 > yield from g
2025-07-01 17:49:06.814
2025-07-01 17:49:06.814 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.814 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.814
2025-07-01 17:49:06.814 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.815 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.815 alo = 189, ahi = 1101
2025-07-01 17:49:06.815 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.815 blo = 189, bhi = 1101
2025-07-01 17:49:06.815
2025-07-01 17:49:06.815 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.815 r"""
2025-07-01 17:49:06.815 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.815 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.815 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.815 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.815
2025-07-01 17:49:06.815 Example:
2025-07-01 17:49:06.815
2025-07-01 17:49:06.815 >>> d = Differ()
2025-07-01 17:49:06.815 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.815 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.815 >>> print(''.join(results), end="")
2025-07-01 17:49:06.815 - abcDefghiJkl
2025-07-01 17:49:06.816 + abcdefGhijkl
2025-07-01 17:49:06.816 """
2025-07-01 17:49:06.816
2025-07-01 17:49:06.816 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.816 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.816 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.816 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.816 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.816
2025-07-01 17:49:06.816 # search for the pair that matches best without being identical
2025-07-01 17:49:06.816 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.816 # on junk -- unless we have to)
2025-07-01 17:49:06.816 for j in range(blo, bhi):
2025-07-01 17:49:06.816 bj = b[j]
2025-07-01 17:49:06.816 cruncher.set_seq2(bj)
2025-07-01 17:49:06.816 for i in range(alo, ahi):
2025-07-01 17:49:06.816 ai = a[i]
2025-07-01 17:49:06.816 if ai == bj:
2025-07-01 17:49:06.816 if eqi is None:
2025-07-01 17:49:06.816 eqi, eqj = i, j
2025-07-01 17:49:06.817 continue
2025-07-01 17:49:06.817 cruncher.set_seq1(ai)
2025-07-01 17:49:06.817 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.817 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.817 # compares by a factor of 3.
2025-07-01 17:49:06.817 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.817 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.817 # of the computation is cached by cruncher
2025-07-01 17:49:06.817 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.817 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.817 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.817 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.817 if best_ratio < cutoff:
2025-07-01 17:49:06.817 # no non-identical "pretty close" pair
2025-07-01 17:49:06.817 if eqi is None:
2025-07-01 17:49:06.817 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.817 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.817 return
2025-07-01 17:49:06.817 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.817 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.818 else:
2025-07-01 17:49:06.818 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.818 eqi = None
2025-07-01 17:49:06.818
2025-07-01 17:49:06.818 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.818 # identical
2025-07-01 17:49:06.818
2025-07-01 17:49:06.818 # pump out diffs from before the synch point
2025-07-01 17:49:06.818 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.818
2025-07-01 17:49:06.818 # do intraline marking on the synch pair
2025-07-01 17:49:06.818 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.818 if eqi is None:
2025-07-01 17:49:06.818 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.818 atags = btags = ""
2025-07-01 17:49:06.818 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.818 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.818 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.818 if tag == 'replace':
2025-07-01 17:49:06.818 atags += '^' * la
2025-07-01 17:49:06.818 btags += '^' * lb
2025-07-01 17:49:06.818 elif tag == 'delete':
2025-07-01 17:49:06.819 atags += '-' * la
2025-07-01 17:49:06.819 elif tag == 'insert':
2025-07-01 17:49:06.819 btags += '+' * lb
2025-07-01 17:49:06.819 elif tag == 'equal':
2025-07-01 17:49:06.819 atags += ' ' * la
2025-07-01 17:49:06.819 btags += ' ' * lb
2025-07-01 17:49:06.819 else:
2025-07-01 17:49:06.819 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.819 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.819 else:
2025-07-01 17:49:06.819 # the synch pair is identical
2025-07-01 17:49:06.819 yield ' ' + aelt
2025-07-01 17:49:06.819
2025-07-01 17:49:06.819 # pump out diffs from after the synch point
2025-07-01 17:49:06.819 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.819
2025-07-01 17:49:06.819 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.819 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.819
2025-07-01 17:49:06.819 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.819 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.820 alo = 190, ahi = 1101
2025-07-01 17:49:06.820 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.820 blo = 190, bhi = 1101
2025-07-01 17:49:06.820
2025-07-01 17:49:06.820 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.820 g = []
2025-07-01 17:49:06.820 if alo < ahi:
2025-07-01 17:49:06.820 if blo < bhi:
2025-07-01 17:49:06.820 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.820 else:
2025-07-01 17:49:06.820 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.820 elif blo < bhi:
2025-07-01 17:49:06.820 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.820
2025-07-01 17:49:06.820 > yield from g
2025-07-01 17:49:06.820
2025-07-01 17:49:06.820 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.820 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.820
2025-07-01 17:49:06.820 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.820 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.821 alo = 190, ahi = 1101
2025-07-01 17:49:06.821 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.821 blo = 190, bhi = 1101
2025-07-01 17:49:06.821
2025-07-01 17:49:06.821 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.821 r"""
2025-07-01 17:49:06.821 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.821 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.821 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.821 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.821
2025-07-01 17:49:06.821 Example:
2025-07-01 17:49:06.821
2025-07-01 17:49:06.821 >>> d = Differ()
2025-07-01 17:49:06.821 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.821 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.821 >>> print(''.join(results), end="")
2025-07-01 17:49:06.821 - abcDefghiJkl
2025-07-01 17:49:06.821 + abcdefGhijkl
2025-07-01 17:49:06.822 """
2025-07-01 17:49:06.822
2025-07-01 17:49:06.822 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.822 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.822 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.822 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.822 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.822
2025-07-01 17:49:06.822 # search for the pair that matches best without being identical
2025-07-01 17:49:06.822 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.822 # on junk -- unless we have to)
2025-07-01 17:49:06.822 for j in range(blo, bhi):
2025-07-01 17:49:06.822 bj = b[j]
2025-07-01 17:49:06.822 cruncher.set_seq2(bj)
2025-07-01 17:49:06.822 for i in range(alo, ahi):
2025-07-01 17:49:06.822 ai = a[i]
2025-07-01 17:49:06.822 if ai == bj:
2025-07-01 17:49:06.822 if eqi is None:
2025-07-01 17:49:06.822 eqi, eqj = i, j
2025-07-01 17:49:06.823 continue
2025-07-01 17:49:06.823 cruncher.set_seq1(ai)
2025-07-01 17:49:06.823 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.823 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.823 # compares by a factor of 3.
2025-07-01 17:49:06.823 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.823 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.823 # of the computation is cached by cruncher
2025-07-01 17:49:06.823 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.823 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.823 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.823 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.823 if best_ratio < cutoff:
2025-07-01 17:49:06.823 # no non-identical "pretty close" pair
2025-07-01 17:49:06.823 if eqi is None:
2025-07-01 17:49:06.823 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.823 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.823 return
2025-07-01 17:49:06.823 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.823 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.824 else:
2025-07-01 17:49:06.824 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.824 eqi = None
2025-07-01 17:49:06.824
2025-07-01 17:49:06.824 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.824 # identical
2025-07-01 17:49:06.824
2025-07-01 17:49:06.824 # pump out diffs from before the synch point
2025-07-01 17:49:06.824 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.824
2025-07-01 17:49:06.824 # do intraline marking on the synch pair
2025-07-01 17:49:06.824 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.824 if eqi is None:
2025-07-01 17:49:06.824 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.824 atags = btags = ""
2025-07-01 17:49:06.824 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.824 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.824 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.824 if tag == 'replace':
2025-07-01 17:49:06.824 atags += '^' * la
2025-07-01 17:49:06.824 btags += '^' * lb
2025-07-01 17:49:06.825 elif tag == 'delete':
2025-07-01 17:49:06.825 atags += '-' * la
2025-07-01 17:49:06.825 elif tag == 'insert':
2025-07-01 17:49:06.825 btags += '+' * lb
2025-07-01 17:49:06.825 elif tag == 'equal':
2025-07-01 17:49:06.825 atags += ' ' * la
2025-07-01 17:49:06.825 btags += ' ' * lb
2025-07-01 17:49:06.825 else:
2025-07-01 17:49:06.825 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.825 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.825 else:
2025-07-01 17:49:06.825 # the synch pair is identical
2025-07-01 17:49:06.825 yield ' ' + aelt
2025-07-01 17:49:06.825
2025-07-01 17:49:06.825 # pump out diffs from after the synch point
2025-07-01 17:49:06.825 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.825
2025-07-01 17:49:06.825 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.825 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.825
2025-07-01 17:49:06.825 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.826 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.831 alo = 191, ahi = 1101
2025-07-01 17:49:06.831 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.831 blo = 191, bhi = 1101
2025-07-01 17:49:06.831
2025-07-01 17:49:06.831 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.831 g = []
2025-07-01 17:49:06.831 if alo < ahi:
2025-07-01 17:49:06.831 if blo < bhi:
2025-07-01 17:49:06.831 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.831 else:
2025-07-01 17:49:06.831 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.831 elif blo < bhi:
2025-07-01 17:49:06.831 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.831
2025-07-01 17:49:06.831 > yield from g
2025-07-01 17:49:06.831
2025-07-01 17:49:06.831 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.831 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.832
2025-07-01 17:49:06.832 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.832 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.832 alo = 191, ahi = 1101
2025-07-01 17:49:06.832 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.832 blo = 191, bhi = 1101
2025-07-01 17:49:06.832
2025-07-01 17:49:06.832 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.832 r"""
2025-07-01 17:49:06.832 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.832 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.832 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.832 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.832
2025-07-01 17:49:06.832 Example:
2025-07-01 17:49:06.832
2025-07-01 17:49:06.832 >>> d = Differ()
2025-07-01 17:49:06.832 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.832 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.832 >>> print(''.join(results), end="")
2025-07-01 17:49:06.832 - abcDefghiJkl
2025-07-01 17:49:06.833 + abcdefGhijkl
2025-07-01 17:49:06.833 """
2025-07-01 17:49:06.833
2025-07-01 17:49:06.833 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.833 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.833 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.833 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.833 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.833
2025-07-01 17:49:06.833 # search for the pair that matches best without being identical
2025-07-01 17:49:06.833 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.833 # on junk -- unless we have to)
2025-07-01 17:49:06.833 for j in range(blo, bhi):
2025-07-01 17:49:06.833 bj = b[j]
2025-07-01 17:49:06.833 cruncher.set_seq2(bj)
2025-07-01 17:49:06.833 for i in range(alo, ahi):
2025-07-01 17:49:06.833 ai = a[i]
2025-07-01 17:49:06.833 if ai == bj:
2025-07-01 17:49:06.833 if eqi is None:
2025-07-01 17:49:06.833 eqi, eqj = i, j
2025-07-01 17:49:06.834 continue
2025-07-01 17:49:06.834 cruncher.set_seq1(ai)
2025-07-01 17:49:06.834 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.834 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.834 # compares by a factor of 3.
2025-07-01 17:49:06.834 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.834 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.834 # of the computation is cached by cruncher
2025-07-01 17:49:06.834 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.834 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.834 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.834 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.834 if best_ratio < cutoff:
2025-07-01 17:49:06.834 # no non-identical "pretty close" pair
2025-07-01 17:49:06.834 if eqi is None:
2025-07-01 17:49:06.834 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.834 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.834 return
2025-07-01 17:49:06.834 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.834 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.834 else:
2025-07-01 17:49:06.835 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.835 eqi = None
2025-07-01 17:49:06.835
2025-07-01 17:49:06.835 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.835 # identical
2025-07-01 17:49:06.835
2025-07-01 17:49:06.835 # pump out diffs from before the synch point
2025-07-01 17:49:06.835 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.835
2025-07-01 17:49:06.835 # do intraline marking on the synch pair
2025-07-01 17:49:06.835 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.835 if eqi is None:
2025-07-01 17:49:06.835 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.835 atags = btags = ""
2025-07-01 17:49:06.835 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.835 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.835 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.835 if tag == 'replace':
2025-07-01 17:49:06.835 atags += '^' * la
2025-07-01 17:49:06.836 btags += '^' * lb
2025-07-01 17:49:06.836 elif tag == 'delete':
2025-07-01 17:49:06.836 atags += '-' * la
2025-07-01 17:49:06.836 elif tag == 'insert':
2025-07-01 17:49:06.836 btags += '+' * lb
2025-07-01 17:49:06.836 elif tag == 'equal':
2025-07-01 17:49:06.836 atags += ' ' * la
2025-07-01 17:49:06.836 btags += ' ' * lb
2025-07-01 17:49:06.836 else:
2025-07-01 17:49:06.836 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.836 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.836 else:
2025-07-01 17:49:06.836 # the synch pair is identical
2025-07-01 17:49:06.836 yield ' ' + aelt
2025-07-01 17:49:06.836
2025-07-01 17:49:06.836 # pump out diffs from after the synch point
2025-07-01 17:49:06.836 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.836
2025-07-01 17:49:06.836 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.836 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.836
2025-07-01 17:49:06.837 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.837 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.837 alo = 192, ahi = 1101
2025-07-01 17:49:06.837 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.837 blo = 192, bhi = 1101
2025-07-01 17:49:06.837
2025-07-01 17:49:06.837 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.837 g = []
2025-07-01 17:49:06.837 if alo < ahi:
2025-07-01 17:49:06.837 if blo < bhi:
2025-07-01 17:49:06.837 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.837 else:
2025-07-01 17:49:06.837 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.837 elif blo < bhi:
2025-07-01 17:49:06.837 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.837
2025-07-01 17:49:06.837 > yield from g
2025-07-01 17:49:06.837
2025-07-01 17:49:06.837 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.837 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.837
2025-07-01 17:49:06.838 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.838 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.838 alo = 192, ahi = 1101
2025-07-01 17:49:06.838 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.838 blo = 192, bhi = 1101
2025-07-01 17:49:06.838
2025-07-01 17:49:06.838 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.838 r"""
2025-07-01 17:49:06.838 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.838 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.838 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.838 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.838
2025-07-01 17:49:06.838 Example:
2025-07-01 17:49:06.838
2025-07-01 17:49:06.838 >>> d = Differ()
2025-07-01 17:49:06.838 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.838 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.838 >>> print(''.join(results), end="")
2025-07-01 17:49:06.838 - abcDefghiJkl
2025-07-01 17:49:06.839 + abcdefGhijkl
2025-07-01 17:49:06.839 """
2025-07-01 17:49:06.839
2025-07-01 17:49:06.839 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.839 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.839 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.839 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.839 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.839
2025-07-01 17:49:06.839 # search for the pair that matches best without being identical
2025-07-01 17:49:06.839 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.839 # on junk -- unless we have to)
2025-07-01 17:49:06.839 for j in range(blo, bhi):
2025-07-01 17:49:06.839 bj = b[j]
2025-07-01 17:49:06.839 cruncher.set_seq2(bj)
2025-07-01 17:49:06.839 for i in range(alo, ahi):
2025-07-01 17:49:06.839 ai = a[i]
2025-07-01 17:49:06.839 if ai == bj:
2025-07-01 17:49:06.839 if eqi is None:
2025-07-01 17:49:06.839 eqi, eqj = i, j
2025-07-01 17:49:06.839 continue
2025-07-01 17:49:06.840 cruncher.set_seq1(ai)
2025-07-01 17:49:06.840 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.840 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.840 # compares by a factor of 3.
2025-07-01 17:49:06.840 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.840 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.840 # of the computation is cached by cruncher
2025-07-01 17:49:06.840 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.840 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.840 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.840 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.840 if best_ratio < cutoff:
2025-07-01 17:49:06.840 # no non-identical "pretty close" pair
2025-07-01 17:49:06.840 if eqi is None:
2025-07-01 17:49:06.840 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.840 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.840 return
2025-07-01 17:49:06.840 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.840 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.840 else:
2025-07-01 17:49:06.840 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.840 eqi = None
2025-07-01 17:49:06.841
2025-07-01 17:49:06.841 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.841 # identical
2025-07-01 17:49:06.841
2025-07-01 17:49:06.841 # pump out diffs from before the synch point
2025-07-01 17:49:06.841 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.841
2025-07-01 17:49:06.841 # do intraline marking on the synch pair
2025-07-01 17:49:06.841 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.841 if eqi is None:
2025-07-01 17:49:06.841 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.841 atags = btags = ""
2025-07-01 17:49:06.841 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.841 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.841 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.841 if tag == 'replace':
2025-07-01 17:49:06.841 atags += '^' * la
2025-07-01 17:49:06.841 btags += '^' * lb
2025-07-01 17:49:06.841 elif tag == 'delete':
2025-07-01 17:49:06.841 atags += '-' * la
2025-07-01 17:49:06.841 elif tag == 'insert':
2025-07-01 17:49:06.841 btags += '+' * lb
2025-07-01 17:49:06.845 elif tag == 'equal':
2025-07-01 17:49:06.845 atags += ' ' * la
2025-07-01 17:49:06.845 btags += ' ' * lb
2025-07-01 17:49:06.845 else:
2025-07-01 17:49:06.845 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.845 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.845 else:
2025-07-01 17:49:06.845 # the synch pair is identical
2025-07-01 17:49:06.845 yield ' ' + aelt
2025-07-01 17:49:06.845
2025-07-01 17:49:06.845 # pump out diffs from after the synch point
2025-07-01 17:49:06.845 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.845
2025-07-01 17:49:06.845 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.845 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.845
2025-07-01 17:49:06.845 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.845 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.845 alo = 193, ahi = 1101
2025-07-01 17:49:06.846 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.846 blo = 193, bhi = 1101
2025-07-01 17:49:06.846
2025-07-01 17:49:06.846 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.846 g = []
2025-07-01 17:49:06.846 if alo < ahi:
2025-07-01 17:49:06.846 if blo < bhi:
2025-07-01 17:49:06.846 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.846 else:
2025-07-01 17:49:06.846 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.846 elif blo < bhi:
2025-07-01 17:49:06.846 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.846
2025-07-01 17:49:06.846 > yield from g
2025-07-01 17:49:06.846
2025-07-01 17:49:06.846 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.846 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.846
2025-07-01 17:49:06.846 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.847 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.847 alo = 193, ahi = 1101
2025-07-01 17:49:06.847 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.847 blo = 193, bhi = 1101
2025-07-01 17:49:06.847
2025-07-01 17:49:06.847 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.847 r"""
2025-07-01 17:49:06.847 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.847 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.847 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.847 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.847
2025-07-01 17:49:06.847 Example:
2025-07-01 17:49:06.847
2025-07-01 17:49:06.847 >>> d = Differ()
2025-07-01 17:49:06.847 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.847 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.847 >>> print(''.join(results), end="")
2025-07-01 17:49:06.847 - abcDefghiJkl
2025-07-01 17:49:06.848 + abcdefGhijkl
2025-07-01 17:49:06.848 """
2025-07-01 17:49:06.848
2025-07-01 17:49:06.848 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.848 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.848 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.848 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.848 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.848
2025-07-01 17:49:06.848 # search for the pair that matches best without being identical
2025-07-01 17:49:06.848 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.848 # on junk -- unless we have to)
2025-07-01 17:49:06.848 for j in range(blo, bhi):
2025-07-01 17:49:06.848 bj = b[j]
2025-07-01 17:49:06.848 cruncher.set_seq2(bj)
2025-07-01 17:49:06.848 for i in range(alo, ahi):
2025-07-01 17:49:06.848 ai = a[i]
2025-07-01 17:49:06.848 if ai == bj:
2025-07-01 17:49:06.848 if eqi is None:
2025-07-01 17:49:06.849 eqi, eqj = i, j
2025-07-01 17:49:06.849 continue
2025-07-01 17:49:06.849 cruncher.set_seq1(ai)
2025-07-01 17:49:06.849 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.849 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.849 # compares by a factor of 3.
2025-07-01 17:49:06.849 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.849 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.849 # of the computation is cached by cruncher
2025-07-01 17:49:06.849 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.849 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.849 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.849 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.849 if best_ratio < cutoff:
2025-07-01 17:49:06.849 # no non-identical "pretty close" pair
2025-07-01 17:49:06.849 if eqi is None:
2025-07-01 17:49:06.849 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.849 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.849 return
2025-07-01 17:49:06.849 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.849 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.850 else:
2025-07-01 17:49:06.850 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.850 eqi = None
2025-07-01 17:49:06.850
2025-07-01 17:49:06.850 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.850 # identical
2025-07-01 17:49:06.850
2025-07-01 17:49:06.850 # pump out diffs from before the synch point
2025-07-01 17:49:06.850 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.850
2025-07-01 17:49:06.850 # do intraline marking on the synch pair
2025-07-01 17:49:06.850 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.850 if eqi is None:
2025-07-01 17:49:06.850 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.850 atags = btags = ""
2025-07-01 17:49:06.850 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.850 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.850 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.850 if tag == 'replace':
2025-07-01 17:49:06.850 atags += '^' * la
2025-07-01 17:49:06.850 btags += '^' * lb
2025-07-01 17:49:06.851 elif tag == 'delete':
2025-07-01 17:49:06.851 atags += '-' * la
2025-07-01 17:49:06.851 elif tag == 'insert':
2025-07-01 17:49:06.851 btags += '+' * lb
2025-07-01 17:49:06.851 elif tag == 'equal':
2025-07-01 17:49:06.851 atags += ' ' * la
2025-07-01 17:49:06.851 btags += ' ' * lb
2025-07-01 17:49:06.851 else:
2025-07-01 17:49:06.851 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.851 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.851 else:
2025-07-01 17:49:06.851 # the synch pair is identical
2025-07-01 17:49:06.851 yield ' ' + aelt
2025-07-01 17:49:06.851
2025-07-01 17:49:06.851 # pump out diffs from after the synch point
2025-07-01 17:49:06.851 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.851
2025-07-01 17:49:06.851 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.851 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.851
2025-07-01 17:49:06.851 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.851 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.852 alo = 194, ahi = 1101
2025-07-01 17:49:06.852 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.852 blo = 194, bhi = 1101
2025-07-01 17:49:06.852
2025-07-01 17:49:06.852 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.852 g = []
2025-07-01 17:49:06.852 if alo < ahi:
2025-07-01 17:49:06.852 if blo < bhi:
2025-07-01 17:49:06.852 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.852 else:
2025-07-01 17:49:06.852 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.852 elif blo < bhi:
2025-07-01 17:49:06.852 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.852
2025-07-01 17:49:06.852 > yield from g
2025-07-01 17:49:06.852
2025-07-01 17:49:06.852 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.852 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.852
2025-07-01 17:49:06.852 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.852 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.853 alo = 194, ahi = 1101
2025-07-01 17:49:06.853 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.853 blo = 194, bhi = 1101
2025-07-01 17:49:06.853
2025-07-01 17:49:06.853 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.853 r"""
2025-07-01 17:49:06.853 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.853 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.853 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.853 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.853
2025-07-01 17:49:06.853 Example:
2025-07-01 17:49:06.853
2025-07-01 17:49:06.853 >>> d = Differ()
2025-07-01 17:49:06.853 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.853 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.853 >>> print(''.join(results), end="")
2025-07-01 17:49:06.853 - abcDefghiJkl
2025-07-01 17:49:06.853 + abcdefGhijkl
2025-07-01 17:49:06.854 """
2025-07-01 17:49:06.854
2025-07-01 17:49:06.854 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.854 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.854 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.854 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.854 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.854
2025-07-01 17:49:06.854 # search for the pair that matches best without being identical
2025-07-01 17:49:06.854 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.854 # on junk -- unless we have to)
2025-07-01 17:49:06.854 for j in range(blo, bhi):
2025-07-01 17:49:06.854 bj = b[j]
2025-07-01 17:49:06.854 cruncher.set_seq2(bj)
2025-07-01 17:49:06.854 for i in range(alo, ahi):
2025-07-01 17:49:06.854 ai = a[i]
2025-07-01 17:49:06.854 if ai == bj:
2025-07-01 17:49:06.854 if eqi is None:
2025-07-01 17:49:06.854 eqi, eqj = i, j
2025-07-01 17:49:06.854 continue
2025-07-01 17:49:06.855 cruncher.set_seq1(ai)
2025-07-01 17:49:06.855 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.855 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.855 # compares by a factor of 3.
2025-07-01 17:49:06.855 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.855 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.855 # of the computation is cached by cruncher
2025-07-01 17:49:06.855 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.855 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.855 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.855 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.855 if best_ratio < cutoff:
2025-07-01 17:49:06.855 # no non-identical "pretty close" pair
2025-07-01 17:49:06.855 if eqi is None:
2025-07-01 17:49:06.855 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.855 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.855 return
2025-07-01 17:49:06.855 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.855 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.855 else:
2025-07-01 17:49:06.855 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.856 eqi = None
2025-07-01 17:49:06.856
2025-07-01 17:49:06.856 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.856 # identical
2025-07-01 17:49:06.856
2025-07-01 17:49:06.856 # pump out diffs from before the synch point
2025-07-01 17:49:06.856 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.856
2025-07-01 17:49:06.856 # do intraline marking on the synch pair
2025-07-01 17:49:06.856 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.856 if eqi is None:
2025-07-01 17:49:06.856 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.856 atags = btags = ""
2025-07-01 17:49:06.856 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.856 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.856 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.856 if tag == 'replace':
2025-07-01 17:49:06.856 atags += '^' * la
2025-07-01 17:49:06.856 btags += '^' * lb
2025-07-01 17:49:06.856 elif tag == 'delete':
2025-07-01 17:49:06.856 atags += '-' * la
2025-07-01 17:49:06.857 elif tag == 'insert':
2025-07-01 17:49:06.857 btags += '+' * lb
2025-07-01 17:49:06.857 elif tag == 'equal':
2025-07-01 17:49:06.857 atags += ' ' * la
2025-07-01 17:49:06.857 btags += ' ' * lb
2025-07-01 17:49:06.857 else:
2025-07-01 17:49:06.857 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.857 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.857 else:
2025-07-01 17:49:06.857 # the synch pair is identical
2025-07-01 17:49:06.857 yield ' ' + aelt
2025-07-01 17:49:06.857
2025-07-01 17:49:06.857 # pump out diffs from after the synch point
2025-07-01 17:49:06.857 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.857
2025-07-01 17:49:06.857 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.857 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.857
2025-07-01 17:49:06.857 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.857 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.857 alo = 195, ahi = 1101
2025-07-01 17:49:06.862 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.863 blo = 195, bhi = 1101
2025-07-01 17:49:06.863
2025-07-01 17:49:06.863 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.863 g = []
2025-07-01 17:49:06.863 if alo < ahi:
2025-07-01 17:49:06.863 if blo < bhi:
2025-07-01 17:49:06.863 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.863 else:
2025-07-01 17:49:06.863 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.863 elif blo < bhi:
2025-07-01 17:49:06.863 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.863
2025-07-01 17:49:06.863 > yield from g
2025-07-01 17:49:06.863
2025-07-01 17:49:06.863 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.863 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.863
2025-07-01 17:49:06.863 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.863 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.864 alo = 195, ahi = 1101
2025-07-01 17:49:06.864 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.864 blo = 195, bhi = 1101
2025-07-01 17:49:06.864
2025-07-01 17:49:06.864 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.864 r"""
2025-07-01 17:49:06.864 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.864 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.864 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.864 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.864
2025-07-01 17:49:06.864 Example:
2025-07-01 17:49:06.864
2025-07-01 17:49:06.864 >>> d = Differ()
2025-07-01 17:49:06.864 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.864 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.864 >>> print(''.join(results), end="")
2025-07-01 17:49:06.864 - abcDefghiJkl
2025-07-01 17:49:06.864 + abcdefGhijkl
2025-07-01 17:49:06.865 """
2025-07-01 17:49:06.865
2025-07-01 17:49:06.865 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.865 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.865 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.865 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.865 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.865
2025-07-01 17:49:06.865 # search for the pair that matches best without being identical
2025-07-01 17:49:06.865 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.865 # on junk -- unless we have to)
2025-07-01 17:49:06.865 for j in range(blo, bhi):
2025-07-01 17:49:06.865 bj = b[j]
2025-07-01 17:49:06.865 cruncher.set_seq2(bj)
2025-07-01 17:49:06.865 for i in range(alo, ahi):
2025-07-01 17:49:06.865 ai = a[i]
2025-07-01 17:49:06.865 if ai == bj:
2025-07-01 17:49:06.865 if eqi is None:
2025-07-01 17:49:06.865 eqi, eqj = i, j
2025-07-01 17:49:06.865 continue
2025-07-01 17:49:06.865 cruncher.set_seq1(ai)
2025-07-01 17:49:06.866 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.866 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.866 # compares by a factor of 3.
2025-07-01 17:49:06.866 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.866 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.866 # of the computation is cached by cruncher
2025-07-01 17:49:06.866 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.866 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.866 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.866 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.866 if best_ratio < cutoff:
2025-07-01 17:49:06.866 # no non-identical "pretty close" pair
2025-07-01 17:49:06.866 if eqi is None:
2025-07-01 17:49:06.866 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.866 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.866 return
2025-07-01 17:49:06.866 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.866 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.866 else:
2025-07-01 17:49:06.866 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.867 eqi = None
2025-07-01 17:49:06.867
2025-07-01 17:49:06.867 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.867 # identical
2025-07-01 17:49:06.867
2025-07-01 17:49:06.867 # pump out diffs from before the synch point
2025-07-01 17:49:06.867 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.867
2025-07-01 17:49:06.867 # do intraline marking on the synch pair
2025-07-01 17:49:06.867 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.867 if eqi is None:
2025-07-01 17:49:06.867 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.867 atags = btags = ""
2025-07-01 17:49:06.867 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.867 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.867 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.867 if tag == 'replace':
2025-07-01 17:49:06.867 atags += '^' * la
2025-07-01 17:49:06.867 btags += '^' * lb
2025-07-01 17:49:06.867 elif tag == 'delete':
2025-07-01 17:49:06.867 atags += '-' * la
2025-07-01 17:49:06.868 elif tag == 'insert':
2025-07-01 17:49:06.868 btags += '+' * lb
2025-07-01 17:49:06.868 elif tag == 'equal':
2025-07-01 17:49:06.868 atags += ' ' * la
2025-07-01 17:49:06.868 btags += ' ' * lb
2025-07-01 17:49:06.868 else:
2025-07-01 17:49:06.868 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.868 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.868 else:
2025-07-01 17:49:06.868 # the synch pair is identical
2025-07-01 17:49:06.868 yield ' ' + aelt
2025-07-01 17:49:06.868
2025-07-01 17:49:06.868 # pump out diffs from after the synch point
2025-07-01 17:49:06.868 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.868
2025-07-01 17:49:06.868 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.868 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.868
2025-07-01 17:49:06.869 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.869 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.869 alo = 196, ahi = 1101
2025-07-01 17:49:06.869 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.869 blo = 196, bhi = 1101
2025-07-01 17:49:06.869
2025-07-01 17:49:06.869 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.869 g = []
2025-07-01 17:49:06.869 if alo < ahi:
2025-07-01 17:49:06.869 if blo < bhi:
2025-07-01 17:49:06.869 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.869 else:
2025-07-01 17:49:06.869 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.869 elif blo < bhi:
2025-07-01 17:49:06.869 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.869
2025-07-01 17:49:06.869 > yield from g
2025-07-01 17:49:06.869
2025-07-01 17:49:06.869 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.869 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.870
2025-07-01 17:49:06.870 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.870 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.870 alo = 196, ahi = 1101
2025-07-01 17:49:06.870 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.870 blo = 196, bhi = 1101
2025-07-01 17:49:06.870
2025-07-01 17:49:06.870 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.870 r"""
2025-07-01 17:49:06.870 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.870 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.870 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.870 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.870
2025-07-01 17:49:06.870 Example:
2025-07-01 17:49:06.870
2025-07-01 17:49:06.870 >>> d = Differ()
2025-07-01 17:49:06.870 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.870 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.870 >>> print(''.join(results), end="")
2025-07-01 17:49:06.870 - abcDefghiJkl
2025-07-01 17:49:06.871 + abcdefGhijkl
2025-07-01 17:49:06.871 """
2025-07-01 17:49:06.871
2025-07-01 17:49:06.871 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.871 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.871 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.871 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.871 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.871
2025-07-01 17:49:06.871 # search for the pair that matches best without being identical
2025-07-01 17:49:06.871 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.871 # on junk -- unless we have to)
2025-07-01 17:49:06.871 for j in range(blo, bhi):
2025-07-01 17:49:06.871 bj = b[j]
2025-07-01 17:49:06.871 cruncher.set_seq2(bj)
2025-07-01 17:49:06.871 for i in range(alo, ahi):
2025-07-01 17:49:06.871 ai = a[i]
2025-07-01 17:49:06.871 if ai == bj:
2025-07-01 17:49:06.871 if eqi is None:
2025-07-01 17:49:06.872 eqi, eqj = i, j
2025-07-01 17:49:06.872 continue
2025-07-01 17:49:06.872 cruncher.set_seq1(ai)
2025-07-01 17:49:06.872 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.872 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.872 # compares by a factor of 3.
2025-07-01 17:49:06.872 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.872 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.872 # of the computation is cached by cruncher
2025-07-01 17:49:06.872 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.872 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.872 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.872 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.872 if best_ratio < cutoff:
2025-07-01 17:49:06.872 # no non-identical "pretty close" pair
2025-07-01 17:49:06.872 if eqi is None:
2025-07-01 17:49:06.872 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.872 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.872 return
2025-07-01 17:49:06.872 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.873 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.875 else:
2025-07-01 17:49:06.876 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.876 eqi = None
2025-07-01 17:49:06.876
2025-07-01 17:49:06.876 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.876 # identical
2025-07-01 17:49:06.876
2025-07-01 17:49:06.876 # pump out diffs from before the synch point
2025-07-01 17:49:06.876 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.876
2025-07-01 17:49:06.876 # do intraline marking on the synch pair
2025-07-01 17:49:06.876 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.876 if eqi is None:
2025-07-01 17:49:06.876 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.876 atags = btags = ""
2025-07-01 17:49:06.876 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.876 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.876 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.876 if tag == 'replace':
2025-07-01 17:49:06.876 atags += '^' * la
2025-07-01 17:49:06.876 btags += '^' * lb
2025-07-01 17:49:06.876 elif tag == 'delete':
2025-07-01 17:49:06.877 atags += '-' * la
2025-07-01 17:49:06.877 elif tag == 'insert':
2025-07-01 17:49:06.877 btags += '+' * lb
2025-07-01 17:49:06.877 elif tag == 'equal':
2025-07-01 17:49:06.877 atags += ' ' * la
2025-07-01 17:49:06.877 btags += ' ' * lb
2025-07-01 17:49:06.877 else:
2025-07-01 17:49:06.877 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.877 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.877 else:
2025-07-01 17:49:06.877 # the synch pair is identical
2025-07-01 17:49:06.877 yield ' ' + aelt
2025-07-01 17:49:06.877
2025-07-01 17:49:06.877 # pump out diffs from after the synch point
2025-07-01 17:49:06.877 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.877
2025-07-01 17:49:06.877 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.877 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.877
2025-07-01 17:49:06.877 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.877 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.878 alo = 197, ahi = 1101
2025-07-01 17:49:06.878 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.878 blo = 197, bhi = 1101
2025-07-01 17:49:06.878
2025-07-01 17:49:06.878 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.878 g = []
2025-07-01 17:49:06.878 if alo < ahi:
2025-07-01 17:49:06.878 if blo < bhi:
2025-07-01 17:49:06.878 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.878 else:
2025-07-01 17:49:06.878 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.878 elif blo < bhi:
2025-07-01 17:49:06.878 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.878
2025-07-01 17:49:06.878 > yield from g
2025-07-01 17:49:06.878
2025-07-01 17:49:06.878 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.878 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.878
2025-07-01 17:49:06.878 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.878 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.878 alo = 197, ahi = 1101
2025-07-01 17:49:06.879 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.879 blo = 197, bhi = 1101
2025-07-01 17:49:06.879
2025-07-01 17:49:06.879 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.879 r"""
2025-07-01 17:49:06.879 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.879 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.879 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.879 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.879
2025-07-01 17:49:06.879 Example:
2025-07-01 17:49:06.879
2025-07-01 17:49:06.879 >>> d = Differ()
2025-07-01 17:49:06.879 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.879 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.879 >>> print(''.join(results), end="")
2025-07-01 17:49:06.879 - abcDefghiJkl
2025-07-01 17:49:06.879 + abcdefGhijkl
2025-07-01 17:49:06.880 """
2025-07-01 17:49:06.880
2025-07-01 17:49:06.880 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.880 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.880 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.880 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.880 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.880
2025-07-01 17:49:06.880 # search for the pair that matches best without being identical
2025-07-01 17:49:06.880 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.880 # on junk -- unless we have to)
2025-07-01 17:49:06.880 for j in range(blo, bhi):
2025-07-01 17:49:06.880 bj = b[j]
2025-07-01 17:49:06.880 cruncher.set_seq2(bj)
2025-07-01 17:49:06.880 for i in range(alo, ahi):
2025-07-01 17:49:06.880 ai = a[i]
2025-07-01 17:49:06.880 if ai == bj:
2025-07-01 17:49:06.880 if eqi is None:
2025-07-01 17:49:06.880 eqi, eqj = i, j
2025-07-01 17:49:06.881 continue
2025-07-01 17:49:06.881 cruncher.set_seq1(ai)
2025-07-01 17:49:06.881 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.881 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.881 # compares by a factor of 3.
2025-07-01 17:49:06.881 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.881 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.881 # of the computation is cached by cruncher
2025-07-01 17:49:06.881 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.881 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.881 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.881 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.881 if best_ratio < cutoff:
2025-07-01 17:49:06.881 # no non-identical "pretty close" pair
2025-07-01 17:49:06.881 if eqi is None:
2025-07-01 17:49:06.881 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.881 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.881 return
2025-07-01 17:49:06.881 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.881 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.881 else:
2025-07-01 17:49:06.882 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.882 eqi = None
2025-07-01 17:49:06.882
2025-07-01 17:49:06.882 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.882 # identical
2025-07-01 17:49:06.882
2025-07-01 17:49:06.882 # pump out diffs from before the synch point
2025-07-01 17:49:06.882 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.882
2025-07-01 17:49:06.882 # do intraline marking on the synch pair
2025-07-01 17:49:06.882 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.882 if eqi is None:
2025-07-01 17:49:06.882 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.882 atags = btags = ""
2025-07-01 17:49:06.882 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.882 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.882 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.882 if tag == 'replace':
2025-07-01 17:49:06.882 atags += '^' * la
2025-07-01 17:49:06.882 btags += '^' * lb
2025-07-01 17:49:06.882 elif tag == 'delete':
2025-07-01 17:49:06.883 atags += '-' * la
2025-07-01 17:49:06.883 elif tag == 'insert':
2025-07-01 17:49:06.883 btags += '+' * lb
2025-07-01 17:49:06.883 elif tag == 'equal':
2025-07-01 17:49:06.883 atags += ' ' * la
2025-07-01 17:49:06.883 btags += ' ' * lb
2025-07-01 17:49:06.883 else:
2025-07-01 17:49:06.883 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.883 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.883 else:
2025-07-01 17:49:06.883 # the synch pair is identical
2025-07-01 17:49:06.883 yield ' ' + aelt
2025-07-01 17:49:06.883
2025-07-01 17:49:06.883 # pump out diffs from after the synch point
2025-07-01 17:49:06.883 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.883
2025-07-01 17:49:06.883 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.883 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.883
2025-07-01 17:49:06.883 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.883 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.884 alo = 198, ahi = 1101
2025-07-01 17:49:06.884 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.884 blo = 198, bhi = 1101
2025-07-01 17:49:06.884
2025-07-01 17:49:06.884 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.884 g = []
2025-07-01 17:49:06.884 if alo < ahi:
2025-07-01 17:49:06.884 if blo < bhi:
2025-07-01 17:49:06.884 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.884 else:
2025-07-01 17:49:06.884 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.884 elif blo < bhi:
2025-07-01 17:49:06.884 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.884
2025-07-01 17:49:06.884 > yield from g
2025-07-01 17:49:06.884
2025-07-01 17:49:06.884 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.884 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.884
2025-07-01 17:49:06.884 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.884 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.884 alo = 198, ahi = 1101
2025-07-01 17:49:06.885 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.885 blo = 198, bhi = 1101
2025-07-01 17:49:06.885
2025-07-01 17:49:06.885 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.885 r"""
2025-07-01 17:49:06.885 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.885 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.885 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.885 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.885
2025-07-01 17:49:06.885 Example:
2025-07-01 17:49:06.885
2025-07-01 17:49:06.885 >>> d = Differ()
2025-07-01 17:49:06.885 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.885 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.885 >>> print(''.join(results), end="")
2025-07-01 17:49:06.885 - abcDefghiJkl
2025-07-01 17:49:06.885 + abcdefGhijkl
2025-07-01 17:49:06.885 """
2025-07-01 17:49:06.886
2025-07-01 17:49:06.886 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.886 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.886 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.886 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.886 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.886
2025-07-01 17:49:06.886 # search for the pair that matches best without being identical
2025-07-01 17:49:06.886 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.886 # on junk -- unless we have to)
2025-07-01 17:49:06.886 for j in range(blo, bhi):
2025-07-01 17:49:06.886 bj = b[j]
2025-07-01 17:49:06.886 cruncher.set_seq2(bj)
2025-07-01 17:49:06.886 for i in range(alo, ahi):
2025-07-01 17:49:06.886 ai = a[i]
2025-07-01 17:49:06.886 if ai == bj:
2025-07-01 17:49:06.886 if eqi is None:
2025-07-01 17:49:06.886 eqi, eqj = i, j
2025-07-01 17:49:06.886 continue
2025-07-01 17:49:06.886 cruncher.set_seq1(ai)
2025-07-01 17:49:06.886 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.886 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.887 # compares by a factor of 3.
2025-07-01 17:49:06.887 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.887 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.887 # of the computation is cached by cruncher
2025-07-01 17:49:06.887 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.887 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.887 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.887 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.887 if best_ratio < cutoff:
2025-07-01 17:49:06.887 # no non-identical "pretty close" pair
2025-07-01 17:49:06.887 if eqi is None:
2025-07-01 17:49:06.887 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.887 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.887 return
2025-07-01 17:49:06.887 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.887 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.887 else:
2025-07-01 17:49:06.887 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.887 eqi = None
2025-07-01 17:49:06.887
2025-07-01 17:49:06.888 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.888 # identical
2025-07-01 17:49:06.888
2025-07-01 17:49:06.888 # pump out diffs from before the synch point
2025-07-01 17:49:06.888 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.888
2025-07-01 17:49:06.888 # do intraline marking on the synch pair
2025-07-01 17:49:06.888 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.888 if eqi is None:
2025-07-01 17:49:06.888 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.888 atags = btags = ""
2025-07-01 17:49:06.888 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.888 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.888 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.888 if tag == 'replace':
2025-07-01 17:49:06.888 atags += '^' * la
2025-07-01 17:49:06.888 btags += '^' * lb
2025-07-01 17:49:06.888 elif tag == 'delete':
2025-07-01 17:49:06.888 atags += '-' * la
2025-07-01 17:49:06.888 elif tag == 'insert':
2025-07-01 17:49:06.888 btags += '+' * lb
2025-07-01 17:49:06.889 elif tag == 'equal':
2025-07-01 17:49:06.894 atags += ' ' * la
2025-07-01 17:49:06.894 btags += ' ' * lb
2025-07-01 17:49:06.894 else:
2025-07-01 17:49:06.894 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.894 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.894 else:
2025-07-01 17:49:06.894 # the synch pair is identical
2025-07-01 17:49:06.895 yield ' ' + aelt
2025-07-01 17:49:06.895
2025-07-01 17:49:06.895 # pump out diffs from after the synch point
2025-07-01 17:49:06.895 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.895
2025-07-01 17:49:06.895 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.895 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.895
2025-07-01 17:49:06.895 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.895 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.895 alo = 199, ahi = 1101
2025-07-01 17:49:06.895 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.895 blo = 199, bhi = 1101
2025-07-01 17:49:06.895
2025-07-01 17:49:06.895 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.895 g = []
2025-07-01 17:49:06.895 if alo < ahi:
2025-07-01 17:49:06.895 if blo < bhi:
2025-07-01 17:49:06.896 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.896 else:
2025-07-01 17:49:06.896 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.896 elif blo < bhi:
2025-07-01 17:49:06.896 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.896
2025-07-01 17:49:06.896 > yield from g
2025-07-01 17:49:06.896
2025-07-01 17:49:06.896 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.896 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.896
2025-07-01 17:49:06.896 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.896 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.896 alo = 199, ahi = 1101
2025-07-01 17:49:06.896 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.896 blo = 199, bhi = 1101
2025-07-01 17:49:06.896
2025-07-01 17:49:06.896 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.896 r"""
2025-07-01 17:49:06.896 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.896 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.897 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.897 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.897
2025-07-01 17:49:06.897 Example:
2025-07-01 17:49:06.897
2025-07-01 17:49:06.897 >>> d = Differ()
2025-07-01 17:49:06.897 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.897 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.897 >>> print(''.join(results), end="")
2025-07-01 17:49:06.897 - abcDefghiJkl
2025-07-01 17:49:06.897 + abcdefGhijkl
2025-07-01 17:49:06.897 """
2025-07-01 17:49:06.897
2025-07-01 17:49:06.897 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.897 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.897 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.897 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.897 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.897
2025-07-01 17:49:06.898 # search for the pair that matches best without being identical
2025-07-01 17:49:06.898 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.898 # on junk -- unless we have to)
2025-07-01 17:49:06.898 for j in range(blo, bhi):
2025-07-01 17:49:06.898 bj = b[j]
2025-07-01 17:49:06.898 cruncher.set_seq2(bj)
2025-07-01 17:49:06.898 for i in range(alo, ahi):
2025-07-01 17:49:06.898 ai = a[i]
2025-07-01 17:49:06.898 if ai == bj:
2025-07-01 17:49:06.898 if eqi is None:
2025-07-01 17:49:06.898 eqi, eqj = i, j
2025-07-01 17:49:06.898 continue
2025-07-01 17:49:06.898 cruncher.set_seq1(ai)
2025-07-01 17:49:06.898 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.898 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.898 # compares by a factor of 3.
2025-07-01 17:49:06.898 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.898 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.898 # of the computation is cached by cruncher
2025-07-01 17:49:06.898 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.898 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.898 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.899 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.899 if best_ratio < cutoff:
2025-07-01 17:49:06.899 # no non-identical "pretty close" pair
2025-07-01 17:49:06.899 if eqi is None:
2025-07-01 17:49:06.899 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.899 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.899 return
2025-07-01 17:49:06.899 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.899 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.899 else:
2025-07-01 17:49:06.899 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.899 eqi = None
2025-07-01 17:49:06.899
2025-07-01 17:49:06.899 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.899 # identical
2025-07-01 17:49:06.899
2025-07-01 17:49:06.899 # pump out diffs from before the synch point
2025-07-01 17:49:06.899 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.899
2025-07-01 17:49:06.899 # do intraline marking on the synch pair
2025-07-01 17:49:06.899 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.900 if eqi is None:
2025-07-01 17:49:06.900 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.900 atags = btags = ""
2025-07-01 17:49:06.900 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.900 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.900 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.900 if tag == 'replace':
2025-07-01 17:49:06.900 atags += '^' * la
2025-07-01 17:49:06.900 btags += '^' * lb
2025-07-01 17:49:06.900 elif tag == 'delete':
2025-07-01 17:49:06.900 atags += '-' * la
2025-07-01 17:49:06.900 elif tag == 'insert':
2025-07-01 17:49:06.900 btags += '+' * lb
2025-07-01 17:49:06.900 elif tag == 'equal':
2025-07-01 17:49:06.900 atags += ' ' * la
2025-07-01 17:49:06.900 btags += ' ' * lb
2025-07-01 17:49:06.900 else:
2025-07-01 17:49:06.900 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.900 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.900 else:
2025-07-01 17:49:06.900 # the synch pair is identical
2025-07-01 17:49:06.900 yield ' ' + aelt
2025-07-01 17:49:06.901
2025-07-01 17:49:06.901 # pump out diffs from after the synch point
2025-07-01 17:49:06.901 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.901
2025-07-01 17:49:06.901 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.901 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.901
2025-07-01 17:49:06.901 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.901 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.901 alo = 202, ahi = 1101
2025-07-01 17:49:06.901 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.901 blo = 202, bhi = 1101
2025-07-01 17:49:06.901
2025-07-01 17:49:06.901 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.901 g = []
2025-07-01 17:49:06.901 if alo < ahi:
2025-07-01 17:49:06.901 if blo < bhi:
2025-07-01 17:49:06.901 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.901 else:
2025-07-01 17:49:06.902 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.902 elif blo < bhi:
2025-07-01 17:49:06.902 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.902
2025-07-01 17:49:06.902 > yield from g
2025-07-01 17:49:06.902
2025-07-01 17:49:06.902 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.902 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.902
2025-07-01 17:49:06.902 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.902 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.902 alo = 202, ahi = 1101
2025-07-01 17:49:06.902 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.902 blo = 202, bhi = 1101
2025-07-01 17:49:06.902
2025-07-01 17:49:06.902 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.903 r"""
2025-07-01 17:49:06.903 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.903 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.903 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.903 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.903
2025-07-01 17:49:06.903 Example:
2025-07-01 17:49:06.903
2025-07-01 17:49:06.903 >>> d = Differ()
2025-07-01 17:49:06.903 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.903 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.903 >>> print(''.join(results), end="")
2025-07-01 17:49:06.903 - abcDefghiJkl
2025-07-01 17:49:06.903 + abcdefGhijkl
2025-07-01 17:49:06.903 """
2025-07-01 17:49:06.903
2025-07-01 17:49:06.903 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.903 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.903 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.904 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.907 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.907
2025-07-01 17:49:06.907 # search for the pair that matches best without being identical
2025-07-01 17:49:06.907 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.907 # on junk -- unless we have to)
2025-07-01 17:49:06.907 for j in range(blo, bhi):
2025-07-01 17:49:06.907 bj = b[j]
2025-07-01 17:49:06.907 cruncher.set_seq2(bj)
2025-07-01 17:49:06.907 for i in range(alo, ahi):
2025-07-01 17:49:06.907 ai = a[i]
2025-07-01 17:49:06.907 if ai == bj:
2025-07-01 17:49:06.907 if eqi is None:
2025-07-01 17:49:06.907 eqi, eqj = i, j
2025-07-01 17:49:06.907 continue
2025-07-01 17:49:06.907 cruncher.set_seq1(ai)
2025-07-01 17:49:06.907 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.907 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.907 # compares by a factor of 3.
2025-07-01 17:49:06.907 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.908 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.908 # of the computation is cached by cruncher
2025-07-01 17:49:06.908 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.908 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.908 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.908 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.908 if best_ratio < cutoff:
2025-07-01 17:49:06.908 # no non-identical "pretty close" pair
2025-07-01 17:49:06.908 if eqi is None:
2025-07-01 17:49:06.908 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.908 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.908 return
2025-07-01 17:49:06.908 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.908 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.908 else:
2025-07-01 17:49:06.908 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.908 eqi = None
2025-07-01 17:49:06.908
2025-07-01 17:49:06.908 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.909 # identical
2025-07-01 17:49:06.909
2025-07-01 17:49:06.909 # pump out diffs from before the synch point
2025-07-01 17:49:06.909 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.909
2025-07-01 17:49:06.909 # do intraline marking on the synch pair
2025-07-01 17:49:06.909 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.909 if eqi is None:
2025-07-01 17:49:06.909 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.909 atags = btags = ""
2025-07-01 17:49:06.909 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.909 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.909 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.909 if tag == 'replace':
2025-07-01 17:49:06.909 atags += '^' * la
2025-07-01 17:49:06.909 btags += '^' * lb
2025-07-01 17:49:06.909 elif tag == 'delete':
2025-07-01 17:49:06.909 atags += '-' * la
2025-07-01 17:49:06.909 elif tag == 'insert':
2025-07-01 17:49:06.909 btags += '+' * lb
2025-07-01 17:49:06.909 elif tag == 'equal':
2025-07-01 17:49:06.910 atags += ' ' * la
2025-07-01 17:49:06.910 btags += ' ' * lb
2025-07-01 17:49:06.910 else:
2025-07-01 17:49:06.910 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.910 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.910 else:
2025-07-01 17:49:06.910 # the synch pair is identical
2025-07-01 17:49:06.910 yield ' ' + aelt
2025-07-01 17:49:06.910
2025-07-01 17:49:06.910 # pump out diffs from after the synch point
2025-07-01 17:49:06.910 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.910
2025-07-01 17:49:06.910 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.910 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.910
2025-07-01 17:49:06.910 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.910 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.910 alo = 203, ahi = 1101
2025-07-01 17:49:06.910 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.911 blo = 203, bhi = 1101
2025-07-01 17:49:06.911
2025-07-01 17:49:06.911 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.911 g = []
2025-07-01 17:49:06.911 if alo < ahi:
2025-07-01 17:49:06.911 if blo < bhi:
2025-07-01 17:49:06.911 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.911 else:
2025-07-01 17:49:06.911 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.911 elif blo < bhi:
2025-07-01 17:49:06.911 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.911
2025-07-01 17:49:06.911 > yield from g
2025-07-01 17:49:06.911
2025-07-01 17:49:06.911 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.911 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.911
2025-07-01 17:49:06.911 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.911 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.912 alo = 203, ahi = 1101
2025-07-01 17:49:06.912 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.912 blo = 203, bhi = 1101
2025-07-01 17:49:06.912
2025-07-01 17:49:06.912 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.912 r"""
2025-07-01 17:49:06.912 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.912 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.912 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.912 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.912
2025-07-01 17:49:06.912 Example:
2025-07-01 17:49:06.912
2025-07-01 17:49:06.912 >>> d = Differ()
2025-07-01 17:49:06.912 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.912 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.912 >>> print(''.join(results), end="")
2025-07-01 17:49:06.912 - abcDefghiJkl
2025-07-01 17:49:06.912 + abcdefGhijkl
2025-07-01 17:49:06.913 """
2025-07-01 17:49:06.913
2025-07-01 17:49:06.913 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.913 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.913 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.913 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.913 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.913
2025-07-01 17:49:06.913 # search for the pair that matches best without being identical
2025-07-01 17:49:06.913 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.913 # on junk -- unless we have to)
2025-07-01 17:49:06.913 for j in range(blo, bhi):
2025-07-01 17:49:06.913 bj = b[j]
2025-07-01 17:49:06.913 cruncher.set_seq2(bj)
2025-07-01 17:49:06.913 for i in range(alo, ahi):
2025-07-01 17:49:06.913 ai = a[i]
2025-07-01 17:49:06.913 if ai == bj:
2025-07-01 17:49:06.913 if eqi is None:
2025-07-01 17:49:06.913 eqi, eqj = i, j
2025-07-01 17:49:06.913 continue
2025-07-01 17:49:06.914 cruncher.set_seq1(ai)
2025-07-01 17:49:06.914 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.914 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.914 # compares by a factor of 3.
2025-07-01 17:49:06.914 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.914 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.914 # of the computation is cached by cruncher
2025-07-01 17:49:06.914 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.914 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.914 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.914 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.914 if best_ratio < cutoff:
2025-07-01 17:49:06.914 # no non-identical "pretty close" pair
2025-07-01 17:49:06.914 if eqi is None:
2025-07-01 17:49:06.914 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.914 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.914 return
2025-07-01 17:49:06.914 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.914 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.915 else:
2025-07-01 17:49:06.915 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.915 eqi = None
2025-07-01 17:49:06.915
2025-07-01 17:49:06.915 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.915 # identical
2025-07-01 17:49:06.915
2025-07-01 17:49:06.915 # pump out diffs from before the synch point
2025-07-01 17:49:06.915 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.915
2025-07-01 17:49:06.915 # do intraline marking on the synch pair
2025-07-01 17:49:06.915 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.915 if eqi is None:
2025-07-01 17:49:06.915 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.915 atags = btags = ""
2025-07-01 17:49:06.915 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.915 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.915 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.915 if tag == 'replace':
2025-07-01 17:49:06.915 atags += '^' * la
2025-07-01 17:49:06.915 btags += '^' * lb
2025-07-01 17:49:06.915 elif tag == 'delete':
2025-07-01 17:49:06.916 atags += '-' * la
2025-07-01 17:49:06.916 elif tag == 'insert':
2025-07-01 17:49:06.916 btags += '+' * lb
2025-07-01 17:49:06.916 elif tag == 'equal':
2025-07-01 17:49:06.916 atags += ' ' * la
2025-07-01 17:49:06.916 btags += ' ' * lb
2025-07-01 17:49:06.916 else:
2025-07-01 17:49:06.916 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.916 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.916 else:
2025-07-01 17:49:06.916 # the synch pair is identical
2025-07-01 17:49:06.916 yield ' ' + aelt
2025-07-01 17:49:06.916
2025-07-01 17:49:06.916 # pump out diffs from after the synch point
2025-07-01 17:49:06.916 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.916
2025-07-01 17:49:06.916 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.916 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.916
2025-07-01 17:49:06.916 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.916 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.917 alo = 204, ahi = 1101
2025-07-01 17:49:06.917 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.917 blo = 204, bhi = 1101
2025-07-01 17:49:06.917
2025-07-01 17:49:06.917 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.917 g = []
2025-07-01 17:49:06.917 if alo < ahi:
2025-07-01 17:49:06.917 if blo < bhi:
2025-07-01 17:49:06.917 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.917 else:
2025-07-01 17:49:06.917 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.917 elif blo < bhi:
2025-07-01 17:49:06.917 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.917
2025-07-01 17:49:06.917 > yield from g
2025-07-01 17:49:06.917
2025-07-01 17:49:06.917 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.917 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.917
2025-07-01 17:49:06.917 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.917 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.918 alo = 204, ahi = 1101
2025-07-01 17:49:06.918 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.918 blo = 204, bhi = 1101
2025-07-01 17:49:06.918
2025-07-01 17:49:06.918 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.918 r"""
2025-07-01 17:49:06.918 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.918 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.918 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.918 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.918
2025-07-01 17:49:06.918 Example:
2025-07-01 17:49:06.918
2025-07-01 17:49:06.918 >>> d = Differ()
2025-07-01 17:49:06.918 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.918 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.918 >>> print(''.join(results), end="")
2025-07-01 17:49:06.918 - abcDefghiJkl
2025-07-01 17:49:06.918 + abcdefGhijkl
2025-07-01 17:49:06.919 """
2025-07-01 17:49:06.919
2025-07-01 17:49:06.919 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.919 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.919 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.919 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.919 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.919
2025-07-01 17:49:06.919 # search for the pair that matches best without being identical
2025-07-01 17:49:06.919 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.919 # on junk -- unless we have to)
2025-07-01 17:49:06.919 for j in range(blo, bhi):
2025-07-01 17:49:06.919 bj = b[j]
2025-07-01 17:49:06.919 cruncher.set_seq2(bj)
2025-07-01 17:49:06.919 for i in range(alo, ahi):
2025-07-01 17:49:06.919 ai = a[i]
2025-07-01 17:49:06.919 if ai == bj:
2025-07-01 17:49:06.919 if eqi is None:
2025-07-01 17:49:06.919 eqi, eqj = i, j
2025-07-01 17:49:06.919 continue
2025-07-01 17:49:06.919 cruncher.set_seq1(ai)
2025-07-01 17:49:06.920 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.924 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.924 # compares by a factor of 3.
2025-07-01 17:49:06.924 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.925 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.925 # of the computation is cached by cruncher
2025-07-01 17:49:06.925 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.925 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.925 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.925 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.925 if best_ratio < cutoff:
2025-07-01 17:49:06.925 # no non-identical "pretty close" pair
2025-07-01 17:49:06.925 if eqi is None:
2025-07-01 17:49:06.925 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.925 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.925 return
2025-07-01 17:49:06.925 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.925 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.925 else:
2025-07-01 17:49:06.925 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.925 eqi = None
2025-07-01 17:49:06.925
2025-07-01 17:49:06.925 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.925 # identical
2025-07-01 17:49:06.925
2025-07-01 17:49:06.926 # pump out diffs from before the synch point
2025-07-01 17:49:06.926 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.926
2025-07-01 17:49:06.926 # do intraline marking on the synch pair
2025-07-01 17:49:06.926 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.926 if eqi is None:
2025-07-01 17:49:06.926 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.926 atags = btags = ""
2025-07-01 17:49:06.926 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.926 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.926 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.926 if tag == 'replace':
2025-07-01 17:49:06.926 atags += '^' * la
2025-07-01 17:49:06.926 btags += '^' * lb
2025-07-01 17:49:06.926 elif tag == 'delete':
2025-07-01 17:49:06.926 atags += '-' * la
2025-07-01 17:49:06.926 elif tag == 'insert':
2025-07-01 17:49:06.926 btags += '+' * lb
2025-07-01 17:49:06.926 elif tag == 'equal':
2025-07-01 17:49:06.927 atags += ' ' * la
2025-07-01 17:49:06.927 btags += ' ' * lb
2025-07-01 17:49:06.927 else:
2025-07-01 17:49:06.927 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.927 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.927 else:
2025-07-01 17:49:06.927 # the synch pair is identical
2025-07-01 17:49:06.927 yield ' ' + aelt
2025-07-01 17:49:06.927
2025-07-01 17:49:06.927 # pump out diffs from after the synch point
2025-07-01 17:49:06.927 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.927
2025-07-01 17:49:06.927 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.927 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.927
2025-07-01 17:49:06.927 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.927 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.927 alo = 205, ahi = 1101
2025-07-01 17:49:06.927 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.927 blo = 205, bhi = 1101
2025-07-01 17:49:06.927
2025-07-01 17:49:06.928 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.928 g = []
2025-07-01 17:49:06.928 if alo < ahi:
2025-07-01 17:49:06.928 if blo < bhi:
2025-07-01 17:49:06.928 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.928 else:
2025-07-01 17:49:06.928 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.928 elif blo < bhi:
2025-07-01 17:49:06.928 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.928
2025-07-01 17:49:06.928 > yield from g
2025-07-01 17:49:06.928
2025-07-01 17:49:06.928 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.928 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.928
2025-07-01 17:49:06.928 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.928 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.928 alo = 205, ahi = 1101
2025-07-01 17:49:06.928 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.928 blo = 205, bhi = 1101
2025-07-01 17:49:06.929
2025-07-01 17:49:06.929 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.929 r"""
2025-07-01 17:49:06.929 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.929 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.929 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.929 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.929
2025-07-01 17:49:06.929 Example:
2025-07-01 17:49:06.929
2025-07-01 17:49:06.929 >>> d = Differ()
2025-07-01 17:49:06.929 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.929 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.929 >>> print(''.join(results), end="")
2025-07-01 17:49:06.929 - abcDefghiJkl
2025-07-01 17:49:06.929 + abcdefGhijkl
2025-07-01 17:49:06.929 """
2025-07-01 17:49:06.929
2025-07-01 17:49:06.929 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.929 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.930 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.930 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.930 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.930
2025-07-01 17:49:06.930 # search for the pair that matches best without being identical
2025-07-01 17:49:06.930 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.930 # on junk -- unless we have to)
2025-07-01 17:49:06.930 for j in range(blo, bhi):
2025-07-01 17:49:06.930 bj = b[j]
2025-07-01 17:49:06.930 cruncher.set_seq2(bj)
2025-07-01 17:49:06.930 for i in range(alo, ahi):
2025-07-01 17:49:06.930 ai = a[i]
2025-07-01 17:49:06.930 if ai == bj:
2025-07-01 17:49:06.930 if eqi is None:
2025-07-01 17:49:06.930 eqi, eqj = i, j
2025-07-01 17:49:06.930 continue
2025-07-01 17:49:06.930 cruncher.set_seq1(ai)
2025-07-01 17:49:06.930 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.930 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.930 # compares by a factor of 3.
2025-07-01 17:49:06.930 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.931 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.931 # of the computation is cached by cruncher
2025-07-01 17:49:06.931 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.931 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.931 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.931 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.931 if best_ratio < cutoff:
2025-07-01 17:49:06.931 # no non-identical "pretty close" pair
2025-07-01 17:49:06.931 if eqi is None:
2025-07-01 17:49:06.931 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.931 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.931 return
2025-07-01 17:49:06.931 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.931 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.931 else:
2025-07-01 17:49:06.931 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.931 eqi = None
2025-07-01 17:49:06.931
2025-07-01 17:49:06.931 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.931 # identical
2025-07-01 17:49:06.931
2025-07-01 17:49:06.931 # pump out diffs from before the synch point
2025-07-01 17:49:06.932 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.932
2025-07-01 17:49:06.932 # do intraline marking on the synch pair
2025-07-01 17:49:06.932 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.932 if eqi is None:
2025-07-01 17:49:06.932 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.932 atags = btags = ""
2025-07-01 17:49:06.932 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.932 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.932 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.932 if tag == 'replace':
2025-07-01 17:49:06.932 atags += '^' * la
2025-07-01 17:49:06.932 btags += '^' * lb
2025-07-01 17:49:06.932 elif tag == 'delete':
2025-07-01 17:49:06.932 atags += '-' * la
2025-07-01 17:49:06.932 elif tag == 'insert':
2025-07-01 17:49:06.932 btags += '+' * lb
2025-07-01 17:49:06.932 elif tag == 'equal':
2025-07-01 17:49:06.932 atags += ' ' * la
2025-07-01 17:49:06.932 btags += ' ' * lb
2025-07-01 17:49:06.932 else:
2025-07-01 17:49:06.932 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.933 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.933 else:
2025-07-01 17:49:06.933 # the synch pair is identical
2025-07-01 17:49:06.933 yield ' ' + aelt
2025-07-01 17:49:06.933
2025-07-01 17:49:06.933 # pump out diffs from after the synch point
2025-07-01 17:49:06.933 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.933
2025-07-01 17:49:06.933 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.933 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.933
2025-07-01 17:49:06.933 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.933 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.933 alo = 206, ahi = 1101
2025-07-01 17:49:06.933 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.933 blo = 206, bhi = 1101
2025-07-01 17:49:06.933
2025-07-01 17:49:06.933 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.933 g = []
2025-07-01 17:49:06.933 if alo < ahi:
2025-07-01 17:49:06.933 if blo < bhi:
2025-07-01 17:49:06.934 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.934 else:
2025-07-01 17:49:06.934 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.934 elif blo < bhi:
2025-07-01 17:49:06.934 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.934
2025-07-01 17:49:06.934 > yield from g
2025-07-01 17:49:06.934
2025-07-01 17:49:06.934 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.934 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.934
2025-07-01 17:49:06.934 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.934 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.934 alo = 206, ahi = 1101
2025-07-01 17:49:06.934 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.934 blo = 206, bhi = 1101
2025-07-01 17:49:06.934
2025-07-01 17:49:06.934 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.934 r"""
2025-07-01 17:49:06.934 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.938 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.938 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.938 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.938
2025-07-01 17:49:06.938 Example:
2025-07-01 17:49:06.938
2025-07-01 17:49:06.938 >>> d = Differ()
2025-07-01 17:49:06.938 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.938 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.938 >>> print(''.join(results), end="")
2025-07-01 17:49:06.938 - abcDefghiJkl
2025-07-01 17:49:06.938 + abcdefGhijkl
2025-07-01 17:49:06.939 """
2025-07-01 17:49:06.939
2025-07-01 17:49:06.939 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.939 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.939 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.939 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.939 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.939
2025-07-01 17:49:06.939 # search for the pair that matches best without being identical
2025-07-01 17:49:06.939 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.939 # on junk -- unless we have to)
2025-07-01 17:49:06.939 for j in range(blo, bhi):
2025-07-01 17:49:06.939 bj = b[j]
2025-07-01 17:49:06.939 cruncher.set_seq2(bj)
2025-07-01 17:49:06.939 for i in range(alo, ahi):
2025-07-01 17:49:06.939 ai = a[i]
2025-07-01 17:49:06.940 if ai == bj:
2025-07-01 17:49:06.940 if eqi is None:
2025-07-01 17:49:06.940 eqi, eqj = i, j
2025-07-01 17:49:06.940 continue
2025-07-01 17:49:06.940 cruncher.set_seq1(ai)
2025-07-01 17:49:06.940 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.940 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.940 # compares by a factor of 3.
2025-07-01 17:49:06.940 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.940 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.940 # of the computation is cached by cruncher
2025-07-01 17:49:06.940 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.940 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.940 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.940 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.940 if best_ratio < cutoff:
2025-07-01 17:49:06.941 # no non-identical "pretty close" pair
2025-07-01 17:49:06.941 if eqi is None:
2025-07-01 17:49:06.941 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.941 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.941 return
2025-07-01 17:49:06.941 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.941 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.941 else:
2025-07-01 17:49:06.941 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.941 eqi = None
2025-07-01 17:49:06.941
2025-07-01 17:49:06.941 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.941 # identical
2025-07-01 17:49:06.941
2025-07-01 17:49:06.941 # pump out diffs from before the synch point
2025-07-01 17:49:06.941 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.941
2025-07-01 17:49:06.942 # do intraline marking on the synch pair
2025-07-01 17:49:06.942 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.942 if eqi is None:
2025-07-01 17:49:06.942 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.942 atags = btags = ""
2025-07-01 17:49:06.942 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.942 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.942 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.942 if tag == 'replace':
2025-07-01 17:49:06.942 atags += '^' * la
2025-07-01 17:49:06.942 btags += '^' * lb
2025-07-01 17:49:06.942 elif tag == 'delete':
2025-07-01 17:49:06.942 atags += '-' * la
2025-07-01 17:49:06.942 elif tag == 'insert':
2025-07-01 17:49:06.942 btags += '+' * lb
2025-07-01 17:49:06.942 elif tag == 'equal':
2025-07-01 17:49:06.943 atags += ' ' * la
2025-07-01 17:49:06.943 btags += ' ' * lb
2025-07-01 17:49:06.943 else:
2025-07-01 17:49:06.943 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.943 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.943 else:
2025-07-01 17:49:06.943 # the synch pair is identical
2025-07-01 17:49:06.943 yield ' ' + aelt
2025-07-01 17:49:06.943
2025-07-01 17:49:06.943 # pump out diffs from after the synch point
2025-07-01 17:49:06.943 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.943
2025-07-01 17:49:06.943 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.943 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.943
2025-07-01 17:49:06.943 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.943 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.944 alo = 207, ahi = 1101
2025-07-01 17:49:06.944 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.944 blo = 207, bhi = 1101
2025-07-01 17:49:06.944
2025-07-01 17:49:06.944 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.944 g = []
2025-07-01 17:49:06.944 if alo < ahi:
2025-07-01 17:49:06.944 if blo < bhi:
2025-07-01 17:49:06.944 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.944 else:
2025-07-01 17:49:06.944 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.944 elif blo < bhi:
2025-07-01 17:49:06.944 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.944
2025-07-01 17:49:06.944 > yield from g
2025-07-01 17:49:06.944
2025-07-01 17:49:06.944 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.945 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.945
2025-07-01 17:49:06.945 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.945 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.945 alo = 207, ahi = 1101
2025-07-01 17:49:06.945 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.945 blo = 207, bhi = 1101
2025-07-01 17:49:06.945
2025-07-01 17:49:06.945 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.945 r"""
2025-07-01 17:49:06.945 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.945 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.945 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.945 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.945
2025-07-01 17:49:06.945 Example:
2025-07-01 17:49:06.945
2025-07-01 17:49:06.946 >>> d = Differ()
2025-07-01 17:49:06.946 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.946 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.946 >>> print(''.join(results), end="")
2025-07-01 17:49:06.946 - abcDefghiJkl
2025-07-01 17:49:06.946 + abcdefGhijkl
2025-07-01 17:49:06.946 """
2025-07-01 17:49:06.946
2025-07-01 17:49:06.946 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.946 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.946 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.946 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.946 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.946
2025-07-01 17:49:06.947 # search for the pair that matches best without being identical
2025-07-01 17:49:06.947 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.947 # on junk -- unless we have to)
2025-07-01 17:49:06.947 for j in range(blo, bhi):
2025-07-01 17:49:06.947 bj = b[j]
2025-07-01 17:49:06.947 cruncher.set_seq2(bj)
2025-07-01 17:49:06.947 for i in range(alo, ahi):
2025-07-01 17:49:06.947 ai = a[i]
2025-07-01 17:49:06.947 if ai == bj:
2025-07-01 17:49:06.947 if eqi is None:
2025-07-01 17:49:06.947 eqi, eqj = i, j
2025-07-01 17:49:06.947 continue
2025-07-01 17:49:06.947 cruncher.set_seq1(ai)
2025-07-01 17:49:06.947 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.947 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.947 # compares by a factor of 3.
2025-07-01 17:49:06.948 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.948 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.948 # of the computation is cached by cruncher
2025-07-01 17:49:06.948 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.948 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.948 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.948 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.948 if best_ratio < cutoff:
2025-07-01 17:49:06.948 # no non-identical "pretty close" pair
2025-07-01 17:49:06.948 if eqi is None:
2025-07-01 17:49:06.948 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.948 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.948 return
2025-07-01 17:49:06.948 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.948 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.948 else:
2025-07-01 17:49:06.949 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.949 eqi = None
2025-07-01 17:49:06.949
2025-07-01 17:49:06.949 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.949 # identical
2025-07-01 17:49:06.949
2025-07-01 17:49:06.949 # pump out diffs from before the synch point
2025-07-01 17:49:06.949 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.949
2025-07-01 17:49:06.949 # do intraline marking on the synch pair
2025-07-01 17:49:06.949 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.949 if eqi is None:
2025-07-01 17:49:06.949 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.949 atags = btags = ""
2025-07-01 17:49:06.949 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.949 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.950 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.950 if tag == 'replace':
2025-07-01 17:49:06.950 atags += '^' * la
2025-07-01 17:49:06.950 btags += '^' * lb
2025-07-01 17:49:06.950 elif tag == 'delete':
2025-07-01 17:49:06.950 atags += '-' * la
2025-07-01 17:49:06.950 elif tag == 'insert':
2025-07-01 17:49:06.950 btags += '+' * lb
2025-07-01 17:49:06.950 elif tag == 'equal':
2025-07-01 17:49:06.950 atags += ' ' * la
2025-07-01 17:49:06.950 btags += ' ' * lb
2025-07-01 17:49:06.950 else:
2025-07-01 17:49:06.950 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.950 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.950 else:
2025-07-01 17:49:06.950 # the synch pair is identical
2025-07-01 17:49:06.950 yield ' ' + aelt
2025-07-01 17:49:06.955
2025-07-01 17:49:06.956 # pump out diffs from after the synch point
2025-07-01 17:49:06.956 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.956
2025-07-01 17:49:06.956 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.956 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.956
2025-07-01 17:49:06.956 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.956 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.956 alo = 208, ahi = 1101
2025-07-01 17:49:06.956 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.956 blo = 208, bhi = 1101
2025-07-01 17:49:06.956
2025-07-01 17:49:06.956 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.956 g = []
2025-07-01 17:49:06.956 if alo < ahi:
2025-07-01 17:49:06.956 if blo < bhi:
2025-07-01 17:49:06.957 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.957 else:
2025-07-01 17:49:06.957 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.957 elif blo < bhi:
2025-07-01 17:49:06.957 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.957
2025-07-01 17:49:06.957 > yield from g
2025-07-01 17:49:06.957
2025-07-01 17:49:06.957 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.957 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.957
2025-07-01 17:49:06.957 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.957 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.957 alo = 208, ahi = 1101
2025-07-01 17:49:06.957 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.957 blo = 208, bhi = 1101
2025-07-01 17:49:06.958
2025-07-01 17:49:06.958 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.958 r"""
2025-07-01 17:49:06.958 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.958 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.958 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.958 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.958
2025-07-01 17:49:06.958 Example:
2025-07-01 17:49:06.958
2025-07-01 17:49:06.958 >>> d = Differ()
2025-07-01 17:49:06.958 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.958 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.958 >>> print(''.join(results), end="")
2025-07-01 17:49:06.958 - abcDefghiJkl
2025-07-01 17:49:06.958 + abcdefGhijkl
2025-07-01 17:49:06.959 """
2025-07-01 17:49:06.959
2025-07-01 17:49:06.959 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.959 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.959 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.959 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.959 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.959
2025-07-01 17:49:06.959 # search for the pair that matches best without being identical
2025-07-01 17:49:06.959 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.959 # on junk -- unless we have to)
2025-07-01 17:49:06.959 for j in range(blo, bhi):
2025-07-01 17:49:06.959 bj = b[j]
2025-07-01 17:49:06.959 cruncher.set_seq2(bj)
2025-07-01 17:49:06.959 for i in range(alo, ahi):
2025-07-01 17:49:06.959 ai = a[i]
2025-07-01 17:49:06.960 if ai == bj:
2025-07-01 17:49:06.960 if eqi is None:
2025-07-01 17:49:06.960 eqi, eqj = i, j
2025-07-01 17:49:06.960 continue
2025-07-01 17:49:06.960 cruncher.set_seq1(ai)
2025-07-01 17:49:06.960 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.960 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.960 # compares by a factor of 3.
2025-07-01 17:49:06.960 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.960 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.960 # of the computation is cached by cruncher
2025-07-01 17:49:06.960 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.960 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.960 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.960 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.960 if best_ratio < cutoff:
2025-07-01 17:49:06.960 # no non-identical "pretty close" pair
2025-07-01 17:49:06.961 if eqi is None:
2025-07-01 17:49:06.961 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.961 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.961 return
2025-07-01 17:49:06.961 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.961 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.961 else:
2025-07-01 17:49:06.961 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.961 eqi = None
2025-07-01 17:49:06.961
2025-07-01 17:49:06.961 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.961 # identical
2025-07-01 17:49:06.961
2025-07-01 17:49:06.961 # pump out diffs from before the synch point
2025-07-01 17:49:06.961 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.961
2025-07-01 17:49:06.962 # do intraline marking on the synch pair
2025-07-01 17:49:06.962 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.962 if eqi is None:
2025-07-01 17:49:06.962 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.962 atags = btags = ""
2025-07-01 17:49:06.962 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.962 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.962 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.962 if tag == 'replace':
2025-07-01 17:49:06.962 atags += '^' * la
2025-07-01 17:49:06.962 btags += '^' * lb
2025-07-01 17:49:06.962 elif tag == 'delete':
2025-07-01 17:49:06.962 atags += '-' * la
2025-07-01 17:49:06.962 elif tag == 'insert':
2025-07-01 17:49:06.962 btags += '+' * lb
2025-07-01 17:49:06.962 elif tag == 'equal':
2025-07-01 17:49:06.962 atags += ' ' * la
2025-07-01 17:49:06.963 btags += ' ' * lb
2025-07-01 17:49:06.963 else:
2025-07-01 17:49:06.963 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.963 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.963 else:
2025-07-01 17:49:06.963 # the synch pair is identical
2025-07-01 17:49:06.963 yield ' ' + aelt
2025-07-01 17:49:06.963
2025-07-01 17:49:06.963 # pump out diffs from after the synch point
2025-07-01 17:49:06.963 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.963
2025-07-01 17:49:06.963 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.963 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.963
2025-07-01 17:49:06.963 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.963 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.964 alo = 209, ahi = 1101
2025-07-01 17:49:06.964 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.964 blo = 209, bhi = 1101
2025-07-01 17:49:06.964
2025-07-01 17:49:06.964 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.964 g = []
2025-07-01 17:49:06.964 if alo < ahi:
2025-07-01 17:49:06.964 if blo < bhi:
2025-07-01 17:49:06.964 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.964 else:
2025-07-01 17:49:06.964 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.964 elif blo < bhi:
2025-07-01 17:49:06.964 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.964
2025-07-01 17:49:06.964 > yield from g
2025-07-01 17:49:06.964
2025-07-01 17:49:06.965 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.965 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.965
2025-07-01 17:49:06.965 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.965 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.965 alo = 209, ahi = 1101
2025-07-01 17:49:06.965 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.965 blo = 209, bhi = 1101
2025-07-01 17:49:06.965
2025-07-01 17:49:06.965 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.965 r"""
2025-07-01 17:49:06.965 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.965 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.965 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.965 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.966
2025-07-01 17:49:06.966 Example:
2025-07-01 17:49:06.966
2025-07-01 17:49:06.966 >>> d = Differ()
2025-07-01 17:49:06.966 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.966 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.966 >>> print(''.join(results), end="")
2025-07-01 17:49:06.966 - abcDefghiJkl
2025-07-01 17:49:06.966 + abcdefGhijkl
2025-07-01 17:49:06.966 """
2025-07-01 17:49:06.966
2025-07-01 17:49:06.966 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.966 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.966 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.966 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.967 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.970
2025-07-01 17:49:06.970 # search for the pair that matches best without being identical
2025-07-01 17:49:06.970 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.970 # on junk -- unless we have to)
2025-07-01 17:49:06.970 for j in range(blo, bhi):
2025-07-01 17:49:06.970 bj = b[j]
2025-07-01 17:49:06.970 cruncher.set_seq2(bj)
2025-07-01 17:49:06.970 for i in range(alo, ahi):
2025-07-01 17:49:06.970 ai = a[i]
2025-07-01 17:49:06.970 if ai == bj:
2025-07-01 17:49:06.970 if eqi is None:
2025-07-01 17:49:06.971 eqi, eqj = i, j
2025-07-01 17:49:06.971 continue
2025-07-01 17:49:06.971 cruncher.set_seq1(ai)
2025-07-01 17:49:06.971 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.971 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.971 # compares by a factor of 3.
2025-07-01 17:49:06.971 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.971 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.971 # of the computation is cached by cruncher
2025-07-01 17:49:06.971 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.971 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.971 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.971 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.971 if best_ratio < cutoff:
2025-07-01 17:49:06.971 # no non-identical "pretty close" pair
2025-07-01 17:49:06.971 if eqi is None:
2025-07-01 17:49:06.972 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.972 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.972 return
2025-07-01 17:49:06.972 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.972 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.972 else:
2025-07-01 17:49:06.972 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.972 eqi = None
2025-07-01 17:49:06.972
2025-07-01 17:49:06.972 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.972 # identical
2025-07-01 17:49:06.972
2025-07-01 17:49:06.972 # pump out diffs from before the synch point
2025-07-01 17:49:06.972 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.972
2025-07-01 17:49:06.972 # do intraline marking on the synch pair
2025-07-01 17:49:06.972 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.972 if eqi is None:
2025-07-01 17:49:06.973 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.973 atags = btags = ""
2025-07-01 17:49:06.973 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.973 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.973 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.973 if tag == 'replace':
2025-07-01 17:49:06.973 atags += '^' * la
2025-07-01 17:49:06.973 btags += '^' * lb
2025-07-01 17:49:06.973 elif tag == 'delete':
2025-07-01 17:49:06.973 atags += '-' * la
2025-07-01 17:49:06.973 elif tag == 'insert':
2025-07-01 17:49:06.973 btags += '+' * lb
2025-07-01 17:49:06.973 elif tag == 'equal':
2025-07-01 17:49:06.973 atags += ' ' * la
2025-07-01 17:49:06.973 btags += ' ' * lb
2025-07-01 17:49:06.973 else:
2025-07-01 17:49:06.974 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.974 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.974 else:
2025-07-01 17:49:06.974 # the synch pair is identical
2025-07-01 17:49:06.974 yield ' ' + aelt
2025-07-01 17:49:06.974
2025-07-01 17:49:06.974 # pump out diffs from after the synch point
2025-07-01 17:49:06.974 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.974
2025-07-01 17:49:06.974 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.974 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.974
2025-07-01 17:49:06.974 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.974 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.974 alo = 210, ahi = 1101
2025-07-01 17:49:06.974 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.974 blo = 210, bhi = 1101
2025-07-01 17:49:06.975
2025-07-01 17:49:06.975 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.975 g = []
2025-07-01 17:49:06.975 if alo < ahi:
2025-07-01 17:49:06.975 if blo < bhi:
2025-07-01 17:49:06.975 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.975 else:
2025-07-01 17:49:06.975 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.975 elif blo < bhi:
2025-07-01 17:49:06.975 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.975
2025-07-01 17:49:06.975 > yield from g
2025-07-01 17:49:06.975
2025-07-01 17:49:06.975 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.975 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.975
2025-07-01 17:49:06.975 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.976 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.976 alo = 210, ahi = 1101
2025-07-01 17:49:06.976 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.976 blo = 210, bhi = 1101
2025-07-01 17:49:06.976
2025-07-01 17:49:06.976 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.976 r"""
2025-07-01 17:49:06.976 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.976 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.976 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.976 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.976
2025-07-01 17:49:06.976 Example:
2025-07-01 17:49:06.976
2025-07-01 17:49:06.976 >>> d = Differ()
2025-07-01 17:49:06.976 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.977 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.977 >>> print(''.join(results), end="")
2025-07-01 17:49:06.977 - abcDefghiJkl
2025-07-01 17:49:06.977 + abcdefGhijkl
2025-07-01 17:49:06.977 """
2025-07-01 17:49:06.977
2025-07-01 17:49:06.977 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.977 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.977 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.977 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.977 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.977
2025-07-01 17:49:06.977 # search for the pair that matches best without being identical
2025-07-01 17:49:06.977 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.978 # on junk -- unless we have to)
2025-07-01 17:49:06.978 for j in range(blo, bhi):
2025-07-01 17:49:06.978 bj = b[j]
2025-07-01 17:49:06.978 cruncher.set_seq2(bj)
2025-07-01 17:49:06.978 for i in range(alo, ahi):
2025-07-01 17:49:06.978 ai = a[i]
2025-07-01 17:49:06.978 if ai == bj:
2025-07-01 17:49:06.978 if eqi is None:
2025-07-01 17:49:06.978 eqi, eqj = i, j
2025-07-01 17:49:06.978 continue
2025-07-01 17:49:06.978 cruncher.set_seq1(ai)
2025-07-01 17:49:06.978 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.978 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.978 # compares by a factor of 3.
2025-07-01 17:49:06.978 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.978 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.978 # of the computation is cached by cruncher
2025-07-01 17:49:06.979 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.979 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.979 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.979 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.979 if best_ratio < cutoff:
2025-07-01 17:49:06.979 # no non-identical "pretty close" pair
2025-07-01 17:49:06.979 if eqi is None:
2025-07-01 17:49:06.979 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.979 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.979 return
2025-07-01 17:49:06.979 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.979 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.979 else:
2025-07-01 17:49:06.979 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.979 eqi = None
2025-07-01 17:49:06.979
2025-07-01 17:49:06.979 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.980 # identical
2025-07-01 17:49:06.980
2025-07-01 17:49:06.980 # pump out diffs from before the synch point
2025-07-01 17:49:06.980 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.980
2025-07-01 17:49:06.980 # do intraline marking on the synch pair
2025-07-01 17:49:06.980 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.980 if eqi is None:
2025-07-01 17:49:06.980 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.980 atags = btags = ""
2025-07-01 17:49:06.980 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.980 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.980 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.980 if tag == 'replace':
2025-07-01 17:49:06.980 atags += '^' * la
2025-07-01 17:49:06.980 btags += '^' * lb
2025-07-01 17:49:06.980 elif tag == 'delete':
2025-07-01 17:49:06.981 atags += '-' * la
2025-07-01 17:49:06.981 elif tag == 'insert':
2025-07-01 17:49:06.981 btags += '+' * lb
2025-07-01 17:49:06.981 elif tag == 'equal':
2025-07-01 17:49:06.981 atags += ' ' * la
2025-07-01 17:49:06.981 btags += ' ' * lb
2025-07-01 17:49:06.981 else:
2025-07-01 17:49:06.981 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.981 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.981 else:
2025-07-01 17:49:06.981 # the synch pair is identical
2025-07-01 17:49:06.981 yield ' ' + aelt
2025-07-01 17:49:06.981
2025-07-01 17:49:06.981 # pump out diffs from after the synch point
2025-07-01 17:49:06.981 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.981
2025-07-01 17:49:06.981 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.982 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.982
2025-07-01 17:49:06.982 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.982 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.982 alo = 211, ahi = 1101
2025-07-01 17:49:06.982 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.982 blo = 211, bhi = 1101
2025-07-01 17:49:06.982
2025-07-01 17:49:06.982 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.982 g = []
2025-07-01 17:49:06.982 if alo < ahi:
2025-07-01 17:49:06.982 if blo < bhi:
2025-07-01 17:49:06.982 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.982 else:
2025-07-01 17:49:06.982 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.982 elif blo < bhi:
2025-07-01 17:49:06.983 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.988
2025-07-01 17:49:06.988 > yield from g
2025-07-01 17:49:06.988
2025-07-01 17:49:06.988 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.988 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.988
2025-07-01 17:49:06.988 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.988 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.988 alo = 211, ahi = 1101
2025-07-01 17:49:06.988 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.988 blo = 211, bhi = 1101
2025-07-01 17:49:06.988
2025-07-01 17:49:06.988 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.988 r"""
2025-07-01 17:49:06.988 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.988 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.989 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.989 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.989
2025-07-01 17:49:06.989 Example:
2025-07-01 17:49:06.989
2025-07-01 17:49:06.989 >>> d = Differ()
2025-07-01 17:49:06.989 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.989 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.989 >>> print(''.join(results), end="")
2025-07-01 17:49:06.989 - abcDefghiJkl
2025-07-01 17:49:06.989 + abcdefGhijkl
2025-07-01 17:49:06.989 """
2025-07-01 17:49:06.989
2025-07-01 17:49:06.989 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.989 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.990 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.990 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.990 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.990
2025-07-01 17:49:06.990 # search for the pair that matches best without being identical
2025-07-01 17:49:06.990 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.990 # on junk -- unless we have to)
2025-07-01 17:49:06.990 for j in range(blo, bhi):
2025-07-01 17:49:06.990 bj = b[j]
2025-07-01 17:49:06.990 cruncher.set_seq2(bj)
2025-07-01 17:49:06.990 for i in range(alo, ahi):
2025-07-01 17:49:06.990 ai = a[i]
2025-07-01 17:49:06.990 if ai == bj:
2025-07-01 17:49:06.990 if eqi is None:
2025-07-01 17:49:06.990 eqi, eqj = i, j
2025-07-01 17:49:06.990 continue
2025-07-01 17:49:06.990 cruncher.set_seq1(ai)
2025-07-01 17:49:06.991 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.991 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.991 # compares by a factor of 3.
2025-07-01 17:49:06.991 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.991 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.991 # of the computation is cached by cruncher
2025-07-01 17:49:06.991 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.991 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.991 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.991 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.991 if best_ratio < cutoff:
2025-07-01 17:49:06.991 # no non-identical "pretty close" pair
2025-07-01 17:49:06.991 if eqi is None:
2025-07-01 17:49:06.991 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:06.991 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.991 return
2025-07-01 17:49:06.991 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:06.992 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:06.992 else:
2025-07-01 17:49:06.992 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:06.992 eqi = None
2025-07-01 17:49:06.992
2025-07-01 17:49:06.992 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:06.992 # identical
2025-07-01 17:49:06.992
2025-07-01 17:49:06.992 # pump out diffs from before the synch point
2025-07-01 17:49:06.992 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:06.992
2025-07-01 17:49:06.992 # do intraline marking on the synch pair
2025-07-01 17:49:06.992 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:06.992 if eqi is None:
2025-07-01 17:49:06.992 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:06.992 atags = btags = ""
2025-07-01 17:49:06.993 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:06.993 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:06.993 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:06.993 if tag == 'replace':
2025-07-01 17:49:06.993 atags += '^' * la
2025-07-01 17:49:06.993 btags += '^' * lb
2025-07-01 17:49:06.993 elif tag == 'delete':
2025-07-01 17:49:06.993 atags += '-' * la
2025-07-01 17:49:06.993 elif tag == 'insert':
2025-07-01 17:49:06.993 btags += '+' * lb
2025-07-01 17:49:06.993 elif tag == 'equal':
2025-07-01 17:49:06.993 atags += ' ' * la
2025-07-01 17:49:06.993 btags += ' ' * lb
2025-07-01 17:49:06.993 else:
2025-07-01 17:49:06.993 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:06.993 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:06.993 else:
2025-07-01 17:49:06.994 # the synch pair is identical
2025-07-01 17:49:06.994 yield ' ' + aelt
2025-07-01 17:49:06.994
2025-07-01 17:49:06.994 # pump out diffs from after the synch point
2025-07-01 17:49:06.994 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:06.994
2025-07-01 17:49:06.994 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:06.994 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.994
2025-07-01 17:49:06.994 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.994 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.994 alo = 212, ahi = 1101
2025-07-01 17:49:06.994 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.994 blo = 212, bhi = 1101
2025-07-01 17:49:06.994
2025-07-01 17:49:06.994 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.994 g = []
2025-07-01 17:49:06.995 if alo < ahi:
2025-07-01 17:49:06.995 if blo < bhi:
2025-07-01 17:49:06.995 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:06.995 else:
2025-07-01 17:49:06.995 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:06.995 elif blo < bhi:
2025-07-01 17:49:06.995 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:06.995
2025-07-01 17:49:06.995 > yield from g
2025-07-01 17:49:06.995
2025-07-01 17:49:06.995 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:06.995 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:06.995
2025-07-01 17:49:06.995 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:06.995 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:06.995 alo = 212, ahi = 1101
2025-07-01 17:49:06.995 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:06.996 blo = 212, bhi = 1101
2025-07-01 17:49:06.996
2025-07-01 17:49:06.996 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:06.996 r"""
2025-07-01 17:49:06.996 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:06.996 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:06.996 synch point, and intraline difference marking is done on the
2025-07-01 17:49:06.996 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:06.996
2025-07-01 17:49:06.996 Example:
2025-07-01 17:49:06.996
2025-07-01 17:49:06.996 >>> d = Differ()
2025-07-01 17:49:06.996 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:06.996 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:06.996 >>> print(''.join(results), end="")
2025-07-01 17:49:06.996 - abcDefghiJkl
2025-07-01 17:49:06.996 + abcdefGhijkl
2025-07-01 17:49:06.997 """
2025-07-01 17:49:06.997
2025-07-01 17:49:06.997 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:06.997 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:06.997 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:06.997 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:06.997 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:06.997
2025-07-01 17:49:06.997 # search for the pair that matches best without being identical
2025-07-01 17:49:06.997 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:06.997 # on junk -- unless we have to)
2025-07-01 17:49:06.997 for j in range(blo, bhi):
2025-07-01 17:49:06.997 bj = b[j]
2025-07-01 17:49:06.997 cruncher.set_seq2(bj)
2025-07-01 17:49:06.997 for i in range(alo, ahi):
2025-07-01 17:49:06.998 ai = a[i]
2025-07-01 17:49:06.998 if ai == bj:
2025-07-01 17:49:06.998 if eqi is None:
2025-07-01 17:49:06.998 eqi, eqj = i, j
2025-07-01 17:49:06.998 continue
2025-07-01 17:49:06.998 cruncher.set_seq1(ai)
2025-07-01 17:49:06.998 # computing similarity is expensive, so use the quick
2025-07-01 17:49:06.998 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:06.998 # compares by a factor of 3.
2025-07-01 17:49:06.998 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:06.998 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:06.998 # of the computation is cached by cruncher
2025-07-01 17:49:06.998 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:06.998 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:06.998 cruncher.ratio() > best_ratio:
2025-07-01 17:49:06.998 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:06.998 if best_ratio < cutoff:
2025-07-01 17:49:06.999 # no non-identical "pretty close" pair
2025-07-01 17:49:07.004 if eqi is None:
2025-07-01 17:49:07.004 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.004 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.004 return
2025-07-01 17:49:07.004 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.004 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.004 else:
2025-07-01 17:49:07.004 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.004 eqi = None
2025-07-01 17:49:07.004
2025-07-01 17:49:07.004 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.004 # identical
2025-07-01 17:49:07.004
2025-07-01 17:49:07.004 # pump out diffs from before the synch point
2025-07-01 17:49:07.005 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.005
2025-07-01 17:49:07.005 # do intraline marking on the synch pair
2025-07-01 17:49:07.005 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.005 if eqi is None:
2025-07-01 17:49:07.005 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.005 atags = btags = ""
2025-07-01 17:49:07.005 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.005 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.005 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.005 if tag == 'replace':
2025-07-01 17:49:07.005 atags += '^' * la
2025-07-01 17:49:07.005 btags += '^' * lb
2025-07-01 17:49:07.005 elif tag == 'delete':
2025-07-01 17:49:07.005 atags += '-' * la
2025-07-01 17:49:07.005 elif tag == 'insert':
2025-07-01 17:49:07.006 btags += '+' * lb
2025-07-01 17:49:07.006 elif tag == 'equal':
2025-07-01 17:49:07.006 atags += ' ' * la
2025-07-01 17:49:07.006 btags += ' ' * lb
2025-07-01 17:49:07.006 else:
2025-07-01 17:49:07.006 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.006 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.006 else:
2025-07-01 17:49:07.006 # the synch pair is identical
2025-07-01 17:49:07.006 yield ' ' + aelt
2025-07-01 17:49:07.006
2025-07-01 17:49:07.006 # pump out diffs from after the synch point
2025-07-01 17:49:07.006 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.006
2025-07-01 17:49:07.006 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.006 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.006
2025-07-01 17:49:07.007 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.007 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.007 alo = 213, ahi = 1101
2025-07-01 17:49:07.007 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.007 blo = 213, bhi = 1101
2025-07-01 17:49:07.007
2025-07-01 17:49:07.007 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.007 g = []
2025-07-01 17:49:07.007 if alo < ahi:
2025-07-01 17:49:07.007 if blo < bhi:
2025-07-01 17:49:07.007 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.007 else:
2025-07-01 17:49:07.007 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.007 elif blo < bhi:
2025-07-01 17:49:07.007 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.007
2025-07-01 17:49:07.008 > yield from g
2025-07-01 17:49:07.008
2025-07-01 17:49:07.008 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.008 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.008
2025-07-01 17:49:07.008 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.008 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.008 alo = 213, ahi = 1101
2025-07-01 17:49:07.008 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.008 blo = 213, bhi = 1101
2025-07-01 17:49:07.008
2025-07-01 17:49:07.008 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.008 r"""
2025-07-01 17:49:07.008 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.009 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.009 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.009 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.009
2025-07-01 17:49:07.009 Example:
2025-07-01 17:49:07.009
2025-07-01 17:49:07.009 >>> d = Differ()
2025-07-01 17:49:07.009 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.009 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.009 >>> print(''.join(results), end="")
2025-07-01 17:49:07.009 - abcDefghiJkl
2025-07-01 17:49:07.009 + abcdefGhijkl
2025-07-01 17:49:07.009 """
2025-07-01 17:49:07.009
2025-07-01 17:49:07.009 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.010 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.010 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.010 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.010 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.010
2025-07-01 17:49:07.010 # search for the pair that matches best without being identical
2025-07-01 17:49:07.010 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.010 # on junk -- unless we have to)
2025-07-01 17:49:07.010 for j in range(blo, bhi):
2025-07-01 17:49:07.010 bj = b[j]
2025-07-01 17:49:07.010 cruncher.set_seq2(bj)
2025-07-01 17:49:07.010 for i in range(alo, ahi):
2025-07-01 17:49:07.010 ai = a[i]
2025-07-01 17:49:07.010 if ai == bj:
2025-07-01 17:49:07.010 if eqi is None:
2025-07-01 17:49:07.010 eqi, eqj = i, j
2025-07-01 17:49:07.010 continue
2025-07-01 17:49:07.011 cruncher.set_seq1(ai)
2025-07-01 17:49:07.011 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.011 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.011 # compares by a factor of 3.
2025-07-01 17:49:07.011 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.011 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.011 # of the computation is cached by cruncher
2025-07-01 17:49:07.011 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.011 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.011 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.011 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.011 if best_ratio < cutoff:
2025-07-01 17:49:07.011 # no non-identical "pretty close" pair
2025-07-01 17:49:07.011 if eqi is None:
2025-07-01 17:49:07.011 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.011 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.011 return
2025-07-01 17:49:07.011 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.012 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.012 else:
2025-07-01 17:49:07.012 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.012 eqi = None
2025-07-01 17:49:07.012
2025-07-01 17:49:07.012 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.012 # identical
2025-07-01 17:49:07.012
2025-07-01 17:49:07.012 # pump out diffs from before the synch point
2025-07-01 17:49:07.012 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.012
2025-07-01 17:49:07.012 # do intraline marking on the synch pair
2025-07-01 17:49:07.012 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.012 if eqi is None:
2025-07-01 17:49:07.012 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.012 atags = btags = ""
2025-07-01 17:49:07.013 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.013 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.013 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.013 if tag == 'replace':
2025-07-01 17:49:07.013 atags += '^' * la
2025-07-01 17:49:07.013 btags += '^' * lb
2025-07-01 17:49:07.013 elif tag == 'delete':
2025-07-01 17:49:07.013 atags += '-' * la
2025-07-01 17:49:07.013 elif tag == 'insert':
2025-07-01 17:49:07.013 btags += '+' * lb
2025-07-01 17:49:07.013 elif tag == 'equal':
2025-07-01 17:49:07.013 atags += ' ' * la
2025-07-01 17:49:07.013 btags += ' ' * lb
2025-07-01 17:49:07.013 else:
2025-07-01 17:49:07.013 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.013 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.013 else:
2025-07-01 17:49:07.014 # the synch pair is identical
2025-07-01 17:49:07.014 yield ' ' + aelt
2025-07-01 17:49:07.014
2025-07-01 17:49:07.014 # pump out diffs from after the synch point
2025-07-01 17:49:07.014 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.014
2025-07-01 17:49:07.014 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.014 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.014
2025-07-01 17:49:07.014 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.014 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.014 alo = 214, ahi = 1101
2025-07-01 17:49:07.014 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.014 blo = 214, bhi = 1101
2025-07-01 17:49:07.014
2025-07-01 17:49:07.019 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.019 g = []
2025-07-01 17:49:07.019 if alo < ahi:
2025-07-01 17:49:07.020 if blo < bhi:
2025-07-01 17:49:07.020 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.020 else:
2025-07-01 17:49:07.020 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.020 elif blo < bhi:
2025-07-01 17:49:07.020 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.020
2025-07-01 17:49:07.020 > yield from g
2025-07-01 17:49:07.020
2025-07-01 17:49:07.020 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.020 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.020
2025-07-01 17:49:07.020 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.020 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.020 alo = 214, ahi = 1101
2025-07-01 17:49:07.020 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.021 blo = 214, bhi = 1101
2025-07-01 17:49:07.021
2025-07-01 17:49:07.021 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.021 r"""
2025-07-01 17:49:07.021 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.021 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.021 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.021 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.021
2025-07-01 17:49:07.021 Example:
2025-07-01 17:49:07.021
2025-07-01 17:49:07.021 >>> d = Differ()
2025-07-01 17:49:07.021 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.021 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.021 >>> print(''.join(results), end="")
2025-07-01 17:49:07.021 - abcDefghiJkl
2025-07-01 17:49:07.022 + abcdefGhijkl
2025-07-01 17:49:07.022 """
2025-07-01 17:49:07.022
2025-07-01 17:49:07.022 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.022 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.022 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.022 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.022 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.022
2025-07-01 17:49:07.022 # search for the pair that matches best without being identical
2025-07-01 17:49:07.022 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.022 # on junk -- unless we have to)
2025-07-01 17:49:07.022 for j in range(blo, bhi):
2025-07-01 17:49:07.022 bj = b[j]
2025-07-01 17:49:07.022 cruncher.set_seq2(bj)
2025-07-01 17:49:07.023 for i in range(alo, ahi):
2025-07-01 17:49:07.023 ai = a[i]
2025-07-01 17:49:07.023 if ai == bj:
2025-07-01 17:49:07.023 if eqi is None:
2025-07-01 17:49:07.023 eqi, eqj = i, j
2025-07-01 17:49:07.023 continue
2025-07-01 17:49:07.023 cruncher.set_seq1(ai)
2025-07-01 17:49:07.023 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.023 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.023 # compares by a factor of 3.
2025-07-01 17:49:07.023 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.023 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.023 # of the computation is cached by cruncher
2025-07-01 17:49:07.023 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.023 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.024 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.024 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.024 if best_ratio < cutoff:
2025-07-01 17:49:07.024 # no non-identical "pretty close" pair
2025-07-01 17:49:07.024 if eqi is None:
2025-07-01 17:49:07.024 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.024 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.024 return
2025-07-01 17:49:07.024 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.024 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.024 else:
2025-07-01 17:49:07.024 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.024 eqi = None
2025-07-01 17:49:07.024
2025-07-01 17:49:07.024 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.024 # identical
2025-07-01 17:49:07.024
2025-07-01 17:49:07.025 # pump out diffs from before the synch point
2025-07-01 17:49:07.025 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.025
2025-07-01 17:49:07.025 # do intraline marking on the synch pair
2025-07-01 17:49:07.025 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.025 if eqi is None:
2025-07-01 17:49:07.025 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.025 atags = btags = ""
2025-07-01 17:49:07.025 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.025 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.025 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.025 if tag == 'replace':
2025-07-01 17:49:07.025 atags += '^' * la
2025-07-01 17:49:07.025 btags += '^' * lb
2025-07-01 17:49:07.025 elif tag == 'delete':
2025-07-01 17:49:07.025 atags += '-' * la
2025-07-01 17:49:07.026 elif tag == 'insert':
2025-07-01 17:49:07.026 btags += '+' * lb
2025-07-01 17:49:07.026 elif tag == 'equal':
2025-07-01 17:49:07.026 atags += ' ' * la
2025-07-01 17:49:07.026 btags += ' ' * lb
2025-07-01 17:49:07.026 else:
2025-07-01 17:49:07.026 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.026 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.026 else:
2025-07-01 17:49:07.026 # the synch pair is identical
2025-07-01 17:49:07.026 yield ' ' + aelt
2025-07-01 17:49:07.026
2025-07-01 17:49:07.026 # pump out diffs from after the synch point
2025-07-01 17:49:07.026 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.026
2025-07-01 17:49:07.026 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.026 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.027
2025-07-01 17:49:07.027 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.027 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.027 alo = 215, ahi = 1101
2025-07-01 17:49:07.027 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.027 blo = 215, bhi = 1101
2025-07-01 17:49:07.027
2025-07-01 17:49:07.027 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.027 g = []
2025-07-01 17:49:07.027 if alo < ahi:
2025-07-01 17:49:07.027 if blo < bhi:
2025-07-01 17:49:07.027 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.027 else:
2025-07-01 17:49:07.027 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.027 elif blo < bhi:
2025-07-01 17:49:07.027 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.027
2025-07-01 17:49:07.028 > yield from g
2025-07-01 17:49:07.028
2025-07-01 17:49:07.028 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.028 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.028
2025-07-01 17:49:07.028 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.028 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.028 alo = 215, ahi = 1101
2025-07-01 17:49:07.028 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.028 blo = 215, bhi = 1101
2025-07-01 17:49:07.028
2025-07-01 17:49:07.028 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.028 r"""
2025-07-01 17:49:07.028 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.028 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.028 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.029 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.029
2025-07-01 17:49:07.029 Example:
2025-07-01 17:49:07.029
2025-07-01 17:49:07.029 >>> d = Differ()
2025-07-01 17:49:07.029 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.029 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.029 >>> print(''.join(results), end="")
2025-07-01 17:49:07.029 - abcDefghiJkl
2025-07-01 17:49:07.029 + abcdefGhijkl
2025-07-01 17:49:07.029 """
2025-07-01 17:49:07.029
2025-07-01 17:49:07.029 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.029 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.029 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.030 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.030 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.030
2025-07-01 17:49:07.030 # search for the pair that matches best without being identical
2025-07-01 17:49:07.030 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.030 # on junk -- unless we have to)
2025-07-01 17:49:07.030 for j in range(blo, bhi):
2025-07-01 17:49:07.030 bj = b[j]
2025-07-01 17:49:07.030 cruncher.set_seq2(bj)
2025-07-01 17:49:07.030 for i in range(alo, ahi):
2025-07-01 17:49:07.030 ai = a[i]
2025-07-01 17:49:07.030 if ai == bj:
2025-07-01 17:49:07.030 if eqi is None:
2025-07-01 17:49:07.030 eqi, eqj = i, j
2025-07-01 17:49:07.030 continue
2025-07-01 17:49:07.030 cruncher.set_seq1(ai)
2025-07-01 17:49:07.030 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.030 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.034 # compares by a factor of 3.
2025-07-01 17:49:07.034 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.034 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.034 # of the computation is cached by cruncher
2025-07-01 17:49:07.034 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.034 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.034 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.034 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.034 if best_ratio < cutoff:
2025-07-01 17:49:07.034 # no non-identical "pretty close" pair
2025-07-01 17:49:07.034 if eqi is None:
2025-07-01 17:49:07.035 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.035 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.035 return
2025-07-01 17:49:07.035 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.035 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.035 else:
2025-07-01 17:49:07.035 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.035 eqi = None
2025-07-01 17:49:07.035
2025-07-01 17:49:07.035 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.035 # identical
2025-07-01 17:49:07.035
2025-07-01 17:49:07.035 # pump out diffs from before the synch point
2025-07-01 17:49:07.035 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.035
2025-07-01 17:49:07.035 # do intraline marking on the synch pair
2025-07-01 17:49:07.035 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.036 if eqi is None:
2025-07-01 17:49:07.036 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.036 atags = btags = ""
2025-07-01 17:49:07.036 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.036 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.036 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.036 if tag == 'replace':
2025-07-01 17:49:07.036 atags += '^' * la
2025-07-01 17:49:07.036 btags += '^' * lb
2025-07-01 17:49:07.036 elif tag == 'delete':
2025-07-01 17:49:07.036 atags += '-' * la
2025-07-01 17:49:07.036 elif tag == 'insert':
2025-07-01 17:49:07.036 btags += '+' * lb
2025-07-01 17:49:07.036 elif tag == 'equal':
2025-07-01 17:49:07.036 atags += ' ' * la
2025-07-01 17:49:07.036 btags += ' ' * lb
2025-07-01 17:49:07.037 else:
2025-07-01 17:49:07.037 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.037 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.037 else:
2025-07-01 17:49:07.037 # the synch pair is identical
2025-07-01 17:49:07.037 yield ' ' + aelt
2025-07-01 17:49:07.037
2025-07-01 17:49:07.037 # pump out diffs from after the synch point
2025-07-01 17:49:07.037 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.037
2025-07-01 17:49:07.037 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.037 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.037
2025-07-01 17:49:07.037 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.037 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.038 alo = 216, ahi = 1101
2025-07-01 17:49:07.038 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.038 blo = 216, bhi = 1101
2025-07-01 17:49:07.038
2025-07-01 17:49:07.038 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.038 g = []
2025-07-01 17:49:07.038 if alo < ahi:
2025-07-01 17:49:07.038 if blo < bhi:
2025-07-01 17:49:07.038 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.038 else:
2025-07-01 17:49:07.038 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.038 elif blo < bhi:
2025-07-01 17:49:07.038 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.038
2025-07-01 17:49:07.038 > yield from g
2025-07-01 17:49:07.038
2025-07-01 17:49:07.038 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.039 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.039
2025-07-01 17:49:07.039 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.039 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.039 alo = 216, ahi = 1101
2025-07-01 17:49:07.039 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.039 blo = 216, bhi = 1101
2025-07-01 17:49:07.039
2025-07-01 17:49:07.039 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.039 r"""
2025-07-01 17:49:07.039 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.039 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.039 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.039 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.039
2025-07-01 17:49:07.039 Example:
2025-07-01 17:49:07.040
2025-07-01 17:49:07.040 >>> d = Differ()
2025-07-01 17:49:07.040 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.040 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.040 >>> print(''.join(results), end="")
2025-07-01 17:49:07.040 - abcDefghiJkl
2025-07-01 17:49:07.040 + abcdefGhijkl
2025-07-01 17:49:07.040 """
2025-07-01 17:49:07.040
2025-07-01 17:49:07.040 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.040 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.040 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.040 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.040 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.040
2025-07-01 17:49:07.041 # search for the pair that matches best without being identical
2025-07-01 17:49:07.041 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.041 # on junk -- unless we have to)
2025-07-01 17:49:07.041 for j in range(blo, bhi):
2025-07-01 17:49:07.041 bj = b[j]
2025-07-01 17:49:07.041 cruncher.set_seq2(bj)
2025-07-01 17:49:07.041 for i in range(alo, ahi):
2025-07-01 17:49:07.041 ai = a[i]
2025-07-01 17:49:07.041 if ai == bj:
2025-07-01 17:49:07.041 if eqi is None:
2025-07-01 17:49:07.041 eqi, eqj = i, j
2025-07-01 17:49:07.041 continue
2025-07-01 17:49:07.041 cruncher.set_seq1(ai)
2025-07-01 17:49:07.041 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.041 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.041 # compares by a factor of 3.
2025-07-01 17:49:07.041 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.041 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.042 # of the computation is cached by cruncher
2025-07-01 17:49:07.042 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.042 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.042 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.042 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.042 if best_ratio < cutoff:
2025-07-01 17:49:07.042 # no non-identical "pretty close" pair
2025-07-01 17:49:07.042 if eqi is None:
2025-07-01 17:49:07.042 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.042 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.042 return
2025-07-01 17:49:07.042 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.042 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.042 else:
2025-07-01 17:49:07.042 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.042 eqi = None
2025-07-01 17:49:07.043
2025-07-01 17:49:07.043 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.043 # identical
2025-07-01 17:49:07.043
2025-07-01 17:49:07.043 # pump out diffs from before the synch point
2025-07-01 17:49:07.043 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.043
2025-07-01 17:49:07.043 # do intraline marking on the synch pair
2025-07-01 17:49:07.043 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.043 if eqi is None:
2025-07-01 17:49:07.043 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.043 atags = btags = ""
2025-07-01 17:49:07.043 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.043 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.043 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.043 if tag == 'replace':
2025-07-01 17:49:07.043 atags += '^' * la
2025-07-01 17:49:07.043 btags += '^' * lb
2025-07-01 17:49:07.044 elif tag == 'delete':
2025-07-01 17:49:07.044 atags += '-' * la
2025-07-01 17:49:07.044 elif tag == 'insert':
2025-07-01 17:49:07.044 btags += '+' * lb
2025-07-01 17:49:07.044 elif tag == 'equal':
2025-07-01 17:49:07.044 atags += ' ' * la
2025-07-01 17:49:07.044 btags += ' ' * lb
2025-07-01 17:49:07.044 else:
2025-07-01 17:49:07.044 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.044 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.044 else:
2025-07-01 17:49:07.044 # the synch pair is identical
2025-07-01 17:49:07.044 yield ' ' + aelt
2025-07-01 17:49:07.044
2025-07-01 17:49:07.044 # pump out diffs from after the synch point
2025-07-01 17:49:07.044 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.044
2025-07-01 17:49:07.045 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.045 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.045
2025-07-01 17:49:07.045 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.045 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.045 alo = 217, ahi = 1101
2025-07-01 17:49:07.045 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.045 blo = 217, bhi = 1101
2025-07-01 17:49:07.045
2025-07-01 17:49:07.045 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.045 g = []
2025-07-01 17:49:07.045 if alo < ahi:
2025-07-01 17:49:07.045 if blo < bhi:
2025-07-01 17:49:07.045 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.046 else:
2025-07-01 17:49:07.046 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.046 elif blo < bhi:
2025-07-01 17:49:07.046 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.046
2025-07-01 17:49:07.046 > yield from g
2025-07-01 17:49:07.046
2025-07-01 17:49:07.046 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.046 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.046
2025-07-01 17:49:07.046 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.046 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.046 alo = 217, ahi = 1101
2025-07-01 17:49:07.046 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.046 blo = 217, bhi = 1101
2025-07-01 17:49:07.046
2025-07-01 17:49:07.046 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.051 r"""
2025-07-01 17:49:07.051 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.051 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.052 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.052 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.052
2025-07-01 17:49:07.052 Example:
2025-07-01 17:49:07.052
2025-07-01 17:49:07.052 >>> d = Differ()
2025-07-01 17:49:07.052 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.052 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.052 >>> print(''.join(results), end="")
2025-07-01 17:49:07.052 - abcDefghiJkl
2025-07-01 17:49:07.052 + abcdefGhijkl
2025-07-01 17:49:07.052 """
2025-07-01 17:49:07.052
2025-07-01 17:49:07.052 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.053 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.053 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.053 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.053 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.053
2025-07-01 17:49:07.053 # search for the pair that matches best without being identical
2025-07-01 17:49:07.053 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.053 # on junk -- unless we have to)
2025-07-01 17:49:07.053 for j in range(blo, bhi):
2025-07-01 17:49:07.053 bj = b[j]
2025-07-01 17:49:07.053 cruncher.set_seq2(bj)
2025-07-01 17:49:07.053 for i in range(alo, ahi):
2025-07-01 17:49:07.053 ai = a[i]
2025-07-01 17:49:07.053 if ai == bj:
2025-07-01 17:49:07.053 if eqi is None:
2025-07-01 17:49:07.054 eqi, eqj = i, j
2025-07-01 17:49:07.054 continue
2025-07-01 17:49:07.054 cruncher.set_seq1(ai)
2025-07-01 17:49:07.054 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.054 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.054 # compares by a factor of 3.
2025-07-01 17:49:07.054 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.054 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.054 # of the computation is cached by cruncher
2025-07-01 17:49:07.054 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.054 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.054 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.054 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.054 if best_ratio < cutoff:
2025-07-01 17:49:07.054 # no non-identical "pretty close" pair
2025-07-01 17:49:07.054 if eqi is None:
2025-07-01 17:49:07.054 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.055 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.055 return
2025-07-01 17:49:07.055 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.055 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.055 else:
2025-07-01 17:49:07.055 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.055 eqi = None
2025-07-01 17:49:07.055
2025-07-01 17:49:07.055 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.055 # identical
2025-07-01 17:49:07.055
2025-07-01 17:49:07.055 # pump out diffs from before the synch point
2025-07-01 17:49:07.055 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.055
2025-07-01 17:49:07.055 # do intraline marking on the synch pair
2025-07-01 17:49:07.055 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.055 if eqi is None:
2025-07-01 17:49:07.056 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.056 atags = btags = ""
2025-07-01 17:49:07.056 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.056 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.056 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.056 if tag == 'replace':
2025-07-01 17:49:07.056 atags += '^' * la
2025-07-01 17:49:07.056 btags += '^' * lb
2025-07-01 17:49:07.056 elif tag == 'delete':
2025-07-01 17:49:07.056 atags += '-' * la
2025-07-01 17:49:07.056 elif tag == 'insert':
2025-07-01 17:49:07.056 btags += '+' * lb
2025-07-01 17:49:07.056 elif tag == 'equal':
2025-07-01 17:49:07.056 atags += ' ' * la
2025-07-01 17:49:07.056 btags += ' ' * lb
2025-07-01 17:49:07.056 else:
2025-07-01 17:49:07.056 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.057 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.057 else:
2025-07-01 17:49:07.057 # the synch pair is identical
2025-07-01 17:49:07.057 yield ' ' + aelt
2025-07-01 17:49:07.057
2025-07-01 17:49:07.057 # pump out diffs from after the synch point
2025-07-01 17:49:07.057 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.057
2025-07-01 17:49:07.057 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.057 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.057
2025-07-01 17:49:07.057 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.057 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.057 alo = 218, ahi = 1101
2025-07-01 17:49:07.057 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.057 blo = 218, bhi = 1101
2025-07-01 17:49:07.058
2025-07-01 17:49:07.058 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.058 g = []
2025-07-01 17:49:07.058 if alo < ahi:
2025-07-01 17:49:07.058 if blo < bhi:
2025-07-01 17:49:07.058 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.058 else:
2025-07-01 17:49:07.058 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.058 elif blo < bhi:
2025-07-01 17:49:07.058 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.058
2025-07-01 17:49:07.058 > yield from g
2025-07-01 17:49:07.058
2025-07-01 17:49:07.058 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.058 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.058
2025-07-01 17:49:07.058 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.059 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.059 alo = 218, ahi = 1101
2025-07-01 17:49:07.059 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.059 blo = 218, bhi = 1101
2025-07-01 17:49:07.059
2025-07-01 17:49:07.059 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.059 r"""
2025-07-01 17:49:07.059 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.059 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.059 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.059 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.059
2025-07-01 17:49:07.059 Example:
2025-07-01 17:49:07.059
2025-07-01 17:49:07.059 >>> d = Differ()
2025-07-01 17:49:07.059 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.059 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.060 >>> print(''.join(results), end="")
2025-07-01 17:49:07.060 - abcDefghiJkl
2025-07-01 17:49:07.060 + abcdefGhijkl
2025-07-01 17:49:07.060 """
2025-07-01 17:49:07.060
2025-07-01 17:49:07.060 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.060 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.060 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.060 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.060 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.060
2025-07-01 17:49:07.060 # search for the pair that matches best without being identical
2025-07-01 17:49:07.060 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.060 # on junk -- unless we have to)
2025-07-01 17:49:07.061 for j in range(blo, bhi):
2025-07-01 17:49:07.061 bj = b[j]
2025-07-01 17:49:07.061 cruncher.set_seq2(bj)
2025-07-01 17:49:07.061 for i in range(alo, ahi):
2025-07-01 17:49:07.061 ai = a[i]
2025-07-01 17:49:07.061 if ai == bj:
2025-07-01 17:49:07.061 if eqi is None:
2025-07-01 17:49:07.061 eqi, eqj = i, j
2025-07-01 17:49:07.061 continue
2025-07-01 17:49:07.061 cruncher.set_seq1(ai)
2025-07-01 17:49:07.061 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.061 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.061 # compares by a factor of 3.
2025-07-01 17:49:07.061 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.061 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.061 # of the computation is cached by cruncher
2025-07-01 17:49:07.062 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.062 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.062 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.062 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.062 if best_ratio < cutoff:
2025-07-01 17:49:07.062 # no non-identical "pretty close" pair
2025-07-01 17:49:07.062 if eqi is None:
2025-07-01 17:49:07.062 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.062 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.062 return
2025-07-01 17:49:07.062 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.062 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.062 else:
2025-07-01 17:49:07.062 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.062 eqi = None
2025-07-01 17:49:07.062
2025-07-01 17:49:07.063 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.066 # identical
2025-07-01 17:49:07.066
2025-07-01 17:49:07.066 # pump out diffs from before the synch point
2025-07-01 17:49:07.066 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.066
2025-07-01 17:49:07.066 # do intraline marking on the synch pair
2025-07-01 17:49:07.066 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.066 if eqi is None:
2025-07-01 17:49:07.066 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.066 atags = btags = ""
2025-07-01 17:49:07.066 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.066 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.067 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.067 if tag == 'replace':
2025-07-01 17:49:07.067 atags += '^' * la
2025-07-01 17:49:07.067 btags += '^' * lb
2025-07-01 17:49:07.067 elif tag == 'delete':
2025-07-01 17:49:07.067 atags += '-' * la
2025-07-01 17:49:07.067 elif tag == 'insert':
2025-07-01 17:49:07.067 btags += '+' * lb
2025-07-01 17:49:07.067 elif tag == 'equal':
2025-07-01 17:49:07.067 atags += ' ' * la
2025-07-01 17:49:07.067 btags += ' ' * lb
2025-07-01 17:49:07.067 else:
2025-07-01 17:49:07.067 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.067 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.067 else:
2025-07-01 17:49:07.067 # the synch pair is identical
2025-07-01 17:49:07.068 yield ' ' + aelt
2025-07-01 17:49:07.068
2025-07-01 17:49:07.068 # pump out diffs from after the synch point
2025-07-01 17:49:07.068 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.068
2025-07-01 17:49:07.068 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.068 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.068
2025-07-01 17:49:07.068 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.068 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.068 alo = 219, ahi = 1101
2025-07-01 17:49:07.068 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.068 blo = 219, bhi = 1101
2025-07-01 17:49:07.068
2025-07-01 17:49:07.068 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.068 g = []
2025-07-01 17:49:07.069 if alo < ahi:
2025-07-01 17:49:07.069 if blo < bhi:
2025-07-01 17:49:07.069 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.069 else:
2025-07-01 17:49:07.069 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.069 elif blo < bhi:
2025-07-01 17:49:07.069 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.069
2025-07-01 17:49:07.069 > yield from g
2025-07-01 17:49:07.069
2025-07-01 17:49:07.069 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.069 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.069
2025-07-01 17:49:07.069 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.069 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.069 alo = 219, ahi = 1101
2025-07-01 17:49:07.069 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.070 blo = 219, bhi = 1101
2025-07-01 17:49:07.070
2025-07-01 17:49:07.070 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.070 r"""
2025-07-01 17:49:07.070 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.070 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.070 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.070 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.070
2025-07-01 17:49:07.070 Example:
2025-07-01 17:49:07.070
2025-07-01 17:49:07.070 >>> d = Differ()
2025-07-01 17:49:07.070 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.070 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.070 >>> print(''.join(results), end="")
2025-07-01 17:49:07.070 - abcDefghiJkl
2025-07-01 17:49:07.071 + abcdefGhijkl
2025-07-01 17:49:07.071 """
2025-07-01 17:49:07.071
2025-07-01 17:49:07.071 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.071 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.071 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.071 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.071 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.071
2025-07-01 17:49:07.071 # search for the pair that matches best without being identical
2025-07-01 17:49:07.071 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.071 # on junk -- unless we have to)
2025-07-01 17:49:07.071 for j in range(blo, bhi):
2025-07-01 17:49:07.071 bj = b[j]
2025-07-01 17:49:07.071 cruncher.set_seq2(bj)
2025-07-01 17:49:07.072 for i in range(alo, ahi):
2025-07-01 17:49:07.072 ai = a[i]
2025-07-01 17:49:07.072 if ai == bj:
2025-07-01 17:49:07.072 if eqi is None:
2025-07-01 17:49:07.072 eqi, eqj = i, j
2025-07-01 17:49:07.072 continue
2025-07-01 17:49:07.072 cruncher.set_seq1(ai)
2025-07-01 17:49:07.072 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.072 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.072 # compares by a factor of 3.
2025-07-01 17:49:07.072 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.072 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.072 # of the computation is cached by cruncher
2025-07-01 17:49:07.072 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.072 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.072 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.072 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.073 if best_ratio < cutoff:
2025-07-01 17:49:07.073 # no non-identical "pretty close" pair
2025-07-01 17:49:07.073 if eqi is None:
2025-07-01 17:49:07.073 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.073 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.073 return
2025-07-01 17:49:07.073 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.073 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.073 else:
2025-07-01 17:49:07.073 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.073 eqi = None
2025-07-01 17:49:07.073
2025-07-01 17:49:07.073 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.073 # identical
2025-07-01 17:49:07.073
2025-07-01 17:49:07.073 # pump out diffs from before the synch point
2025-07-01 17:49:07.073 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.074
2025-07-01 17:49:07.074 # do intraline marking on the synch pair
2025-07-01 17:49:07.074 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.074 if eqi is None:
2025-07-01 17:49:07.074 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.074 atags = btags = ""
2025-07-01 17:49:07.074 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.074 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.074 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.074 if tag == 'replace':
2025-07-01 17:49:07.074 atags += '^' * la
2025-07-01 17:49:07.074 btags += '^' * lb
2025-07-01 17:49:07.074 elif tag == 'delete':
2025-07-01 17:49:07.074 atags += '-' * la
2025-07-01 17:49:07.074 elif tag == 'insert':
2025-07-01 17:49:07.074 btags += '+' * lb
2025-07-01 17:49:07.075 elif tag == 'equal':
2025-07-01 17:49:07.075 atags += ' ' * la
2025-07-01 17:49:07.075 btags += ' ' * lb
2025-07-01 17:49:07.075 else:
2025-07-01 17:49:07.075 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.075 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.075 else:
2025-07-01 17:49:07.075 # the synch pair is identical
2025-07-01 17:49:07.075 yield ' ' + aelt
2025-07-01 17:49:07.075
2025-07-01 17:49:07.075 # pump out diffs from after the synch point
2025-07-01 17:49:07.075 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.075
2025-07-01 17:49:07.075 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.075 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.075
2025-07-01 17:49:07.075 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.075 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.076 alo = 220, ahi = 1101
2025-07-01 17:49:07.076 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.076 blo = 220, bhi = 1101
2025-07-01 17:49:07.076
2025-07-01 17:49:07.076 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.076 g = []
2025-07-01 17:49:07.076 if alo < ahi:
2025-07-01 17:49:07.076 if blo < bhi:
2025-07-01 17:49:07.076 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.076 else:
2025-07-01 17:49:07.076 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.076 elif blo < bhi:
2025-07-01 17:49:07.076 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.076
2025-07-01 17:49:07.076 > yield from g
2025-07-01 17:49:07.077
2025-07-01 17:49:07.077 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.077 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.077
2025-07-01 17:49:07.077 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.077 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.077 alo = 220, ahi = 1101
2025-07-01 17:49:07.077 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.077 blo = 220, bhi = 1101
2025-07-01 17:49:07.077
2025-07-01 17:49:07.077 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.077 r"""
2025-07-01 17:49:07.077 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.077 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.077 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.077 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.078
2025-07-01 17:49:07.078 Example:
2025-07-01 17:49:07.078
2025-07-01 17:49:07.078 >>> d = Differ()
2025-07-01 17:49:07.078 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.078 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.078 >>> print(''.join(results), end="")
2025-07-01 17:49:07.078 - abcDefghiJkl
2025-07-01 17:49:07.078 + abcdefGhijkl
2025-07-01 17:49:07.078 """
2025-07-01 17:49:07.078
2025-07-01 17:49:07.078 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.078 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.078 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.078 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.079 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.083
2025-07-01 17:49:07.083 # search for the pair that matches best without being identical
2025-07-01 17:49:07.083 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.084 # on junk -- unless we have to)
2025-07-01 17:49:07.084 for j in range(blo, bhi):
2025-07-01 17:49:07.084 bj = b[j]
2025-07-01 17:49:07.084 cruncher.set_seq2(bj)
2025-07-01 17:49:07.084 for i in range(alo, ahi):
2025-07-01 17:49:07.084 ai = a[i]
2025-07-01 17:49:07.084 if ai == bj:
2025-07-01 17:49:07.084 if eqi is None:
2025-07-01 17:49:07.084 eqi, eqj = i, j
2025-07-01 17:49:07.084 continue
2025-07-01 17:49:07.084 cruncher.set_seq1(ai)
2025-07-01 17:49:07.084 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.084 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.084 # compares by a factor of 3.
2025-07-01 17:49:07.084 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.084 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.085 # of the computation is cached by cruncher
2025-07-01 17:49:07.085 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.085 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.085 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.085 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.085 if best_ratio < cutoff:
2025-07-01 17:49:07.085 # no non-identical "pretty close" pair
2025-07-01 17:49:07.085 if eqi is None:
2025-07-01 17:49:07.085 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.085 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.085 return
2025-07-01 17:49:07.085 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.085 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.085 else:
2025-07-01 17:49:07.085 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.085 eqi = None
2025-07-01 17:49:07.085
2025-07-01 17:49:07.086 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.086 # identical
2025-07-01 17:49:07.086
2025-07-01 17:49:07.086 # pump out diffs from before the synch point
2025-07-01 17:49:07.086 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.086
2025-07-01 17:49:07.086 # do intraline marking on the synch pair
2025-07-01 17:49:07.086 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.086 if eqi is None:
2025-07-01 17:49:07.086 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.086 atags = btags = ""
2025-07-01 17:49:07.086 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.086 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.086 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.086 if tag == 'replace':
2025-07-01 17:49:07.086 atags += '^' * la
2025-07-01 17:49:07.086 btags += '^' * lb
2025-07-01 17:49:07.087 elif tag == 'delete':
2025-07-01 17:49:07.087 atags += '-' * la
2025-07-01 17:49:07.087 elif tag == 'insert':
2025-07-01 17:49:07.087 btags += '+' * lb
2025-07-01 17:49:07.087 elif tag == 'equal':
2025-07-01 17:49:07.087 atags += ' ' * la
2025-07-01 17:49:07.087 btags += ' ' * lb
2025-07-01 17:49:07.087 else:
2025-07-01 17:49:07.087 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.087 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.087 else:
2025-07-01 17:49:07.087 # the synch pair is identical
2025-07-01 17:49:07.087 yield ' ' + aelt
2025-07-01 17:49:07.087
2025-07-01 17:49:07.087 # pump out diffs from after the synch point
2025-07-01 17:49:07.087 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.087
2025-07-01 17:49:07.088 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.088 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.088
2025-07-01 17:49:07.088 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.088 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.088 alo = 221, ahi = 1101
2025-07-01 17:49:07.088 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.088 blo = 221, bhi = 1101
2025-07-01 17:49:07.088
2025-07-01 17:49:07.088 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.088 g = []
2025-07-01 17:49:07.088 if alo < ahi:
2025-07-01 17:49:07.088 if blo < bhi:
2025-07-01 17:49:07.088 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.088 else:
2025-07-01 17:49:07.088 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.089 elif blo < bhi:
2025-07-01 17:49:07.089 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.089
2025-07-01 17:49:07.089 > yield from g
2025-07-01 17:49:07.089
2025-07-01 17:49:07.089 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.089 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.089
2025-07-01 17:49:07.089 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.089 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.089 alo = 221, ahi = 1101
2025-07-01 17:49:07.089 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.089 blo = 221, bhi = 1101
2025-07-01 17:49:07.089
2025-07-01 17:49:07.089 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.089 r"""
2025-07-01 17:49:07.089 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.090 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.090 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.090 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.090
2025-07-01 17:49:07.090 Example:
2025-07-01 17:49:07.090
2025-07-01 17:49:07.090 >>> d = Differ()
2025-07-01 17:49:07.090 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.090 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.090 >>> print(''.join(results), end="")
2025-07-01 17:49:07.090 - abcDefghiJkl
2025-07-01 17:49:07.090 + abcdefGhijkl
2025-07-01 17:49:07.090 """
2025-07-01 17:49:07.090
2025-07-01 17:49:07.090 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.091 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.091 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.091 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.091 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.091
2025-07-01 17:49:07.091 # search for the pair that matches best without being identical
2025-07-01 17:49:07.091 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.091 # on junk -- unless we have to)
2025-07-01 17:49:07.091 for j in range(blo, bhi):
2025-07-01 17:49:07.091 bj = b[j]
2025-07-01 17:49:07.091 cruncher.set_seq2(bj)
2025-07-01 17:49:07.091 for i in range(alo, ahi):
2025-07-01 17:49:07.091 ai = a[i]
2025-07-01 17:49:07.091 if ai == bj:
2025-07-01 17:49:07.091 if eqi is None:
2025-07-01 17:49:07.091 eqi, eqj = i, j
2025-07-01 17:49:07.092 continue
2025-07-01 17:49:07.092 cruncher.set_seq1(ai)
2025-07-01 17:49:07.092 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.092 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.092 # compares by a factor of 3.
2025-07-01 17:49:07.092 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.092 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.092 # of the computation is cached by cruncher
2025-07-01 17:49:07.092 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.092 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.092 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.092 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.092 if best_ratio < cutoff:
2025-07-01 17:49:07.092 # no non-identical "pretty close" pair
2025-07-01 17:49:07.092 if eqi is None:
2025-07-01 17:49:07.093 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.093 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.093 return
2025-07-01 17:49:07.093 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.093 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.093 else:
2025-07-01 17:49:07.093 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.093 eqi = None
2025-07-01 17:49:07.093
2025-07-01 17:49:07.093 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.093 # identical
2025-07-01 17:49:07.093
2025-07-01 17:49:07.093 # pump out diffs from before the synch point
2025-07-01 17:49:07.093 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.093
2025-07-01 17:49:07.093 # do intraline marking on the synch pair
2025-07-01 17:49:07.093 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.093 if eqi is None:
2025-07-01 17:49:07.094 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.094 atags = btags = ""
2025-07-01 17:49:07.094 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.094 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.094 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.094 if tag == 'replace':
2025-07-01 17:49:07.094 atags += '^' * la
2025-07-01 17:49:07.094 btags += '^' * lb
2025-07-01 17:49:07.094 elif tag == 'delete':
2025-07-01 17:49:07.094 atags += '-' * la
2025-07-01 17:49:07.094 elif tag == 'insert':
2025-07-01 17:49:07.094 btags += '+' * lb
2025-07-01 17:49:07.094 elif tag == 'equal':
2025-07-01 17:49:07.094 atags += ' ' * la
2025-07-01 17:49:07.094 btags += ' ' * lb
2025-07-01 17:49:07.094 else:
2025-07-01 17:49:07.094 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.095 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.098 else:
2025-07-01 17:49:07.098 # the synch pair is identical
2025-07-01 17:49:07.098 yield ' ' + aelt
2025-07-01 17:49:07.098
2025-07-01 17:49:07.098 # pump out diffs from after the synch point
2025-07-01 17:49:07.098 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.098
2025-07-01 17:49:07.098 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.098 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.098
2025-07-01 17:49:07.098 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.098 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.099 alo = 224, ahi = 1101
2025-07-01 17:49:07.099 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.099 blo = 224, bhi = 1101
2025-07-01 17:49:07.099
2025-07-01 17:49:07.099 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.099 g = []
2025-07-01 17:49:07.099 if alo < ahi:
2025-07-01 17:49:07.099 if blo < bhi:
2025-07-01 17:49:07.099 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.099 else:
2025-07-01 17:49:07.099 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.099 elif blo < bhi:
2025-07-01 17:49:07.099 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.099
2025-07-01 17:49:07.099 > yield from g
2025-07-01 17:49:07.099
2025-07-01 17:49:07.099 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.099 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.099
2025-07-01 17:49:07.099 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.099 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.100 alo = 224, ahi = 1101
2025-07-01 17:49:07.100 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.100 blo = 224, bhi = 1101
2025-07-01 17:49:07.100
2025-07-01 17:49:07.100 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.100 r"""
2025-07-01 17:49:07.100 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.100 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.100 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.100 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.100
2025-07-01 17:49:07.100 Example:
2025-07-01 17:49:07.100
2025-07-01 17:49:07.100 >>> d = Differ()
2025-07-01 17:49:07.100 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.100 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.100 >>> print(''.join(results), end="")
2025-07-01 17:49:07.100 - abcDefghiJkl
2025-07-01 17:49:07.100 + abcdefGhijkl
2025-07-01 17:49:07.101 """
2025-07-01 17:49:07.101
2025-07-01 17:49:07.101 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.101 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.101 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.101 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.101 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.101
2025-07-01 17:49:07.101 # search for the pair that matches best without being identical
2025-07-01 17:49:07.101 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.101 # on junk -- unless we have to)
2025-07-01 17:49:07.101 for j in range(blo, bhi):
2025-07-01 17:49:07.101 bj = b[j]
2025-07-01 17:49:07.101 cruncher.set_seq2(bj)
2025-07-01 17:49:07.101 for i in range(alo, ahi):
2025-07-01 17:49:07.101 ai = a[i]
2025-07-01 17:49:07.101 if ai == bj:
2025-07-01 17:49:07.101 if eqi is None:
2025-07-01 17:49:07.101 eqi, eqj = i, j
2025-07-01 17:49:07.102 continue
2025-07-01 17:49:07.102 cruncher.set_seq1(ai)
2025-07-01 17:49:07.102 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.102 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.102 # compares by a factor of 3.
2025-07-01 17:49:07.102 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.102 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.102 # of the computation is cached by cruncher
2025-07-01 17:49:07.102 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.102 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.102 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.102 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.102 if best_ratio < cutoff:
2025-07-01 17:49:07.102 # no non-identical "pretty close" pair
2025-07-01 17:49:07.102 if eqi is None:
2025-07-01 17:49:07.102 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.102 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.102 return
2025-07-01 17:49:07.102 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.102 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.102 else:
2025-07-01 17:49:07.103 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.103 eqi = None
2025-07-01 17:49:07.103
2025-07-01 17:49:07.103 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.103 # identical
2025-07-01 17:49:07.103
2025-07-01 17:49:07.103 # pump out diffs from before the synch point
2025-07-01 17:49:07.103 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.103
2025-07-01 17:49:07.103 # do intraline marking on the synch pair
2025-07-01 17:49:07.103 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.103 if eqi is None:
2025-07-01 17:49:07.103 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.103 atags = btags = ""
2025-07-01 17:49:07.103 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.103 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.103 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.103 if tag == 'replace':
2025-07-01 17:49:07.103 atags += '^' * la
2025-07-01 17:49:07.103 btags += '^' * lb
2025-07-01 17:49:07.104 elif tag == 'delete':
2025-07-01 17:49:07.104 atags += '-' * la
2025-07-01 17:49:07.104 elif tag == 'insert':
2025-07-01 17:49:07.104 btags += '+' * lb
2025-07-01 17:49:07.104 elif tag == 'equal':
2025-07-01 17:49:07.104 atags += ' ' * la
2025-07-01 17:49:07.104 btags += ' ' * lb
2025-07-01 17:49:07.104 else:
2025-07-01 17:49:07.104 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.104 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.104 else:
2025-07-01 17:49:07.104 # the synch pair is identical
2025-07-01 17:49:07.104 yield ' ' + aelt
2025-07-01 17:49:07.104
2025-07-01 17:49:07.104 # pump out diffs from after the synch point
2025-07-01 17:49:07.104 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.104
2025-07-01 17:49:07.104 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.104 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.104
2025-07-01 17:49:07.105 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.105 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.105 alo = 225, ahi = 1101
2025-07-01 17:49:07.105 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.105 blo = 225, bhi = 1101
2025-07-01 17:49:07.105
2025-07-01 17:49:07.105 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.105 g = []
2025-07-01 17:49:07.105 if alo < ahi:
2025-07-01 17:49:07.105 if blo < bhi:
2025-07-01 17:49:07.105 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.105 else:
2025-07-01 17:49:07.105 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.105 elif blo < bhi:
2025-07-01 17:49:07.105 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.105
2025-07-01 17:49:07.105 > yield from g
2025-07-01 17:49:07.105
2025-07-01 17:49:07.105 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.105 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.105
2025-07-01 17:49:07.106 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.106 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.106 alo = 225, ahi = 1101
2025-07-01 17:49:07.106 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.106 blo = 225, bhi = 1101
2025-07-01 17:49:07.106
2025-07-01 17:49:07.106 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.106 r"""
2025-07-01 17:49:07.106 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.106 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.106 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.106 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.106
2025-07-01 17:49:07.106 Example:
2025-07-01 17:49:07.106
2025-07-01 17:49:07.106 >>> d = Differ()
2025-07-01 17:49:07.106 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.106 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.106 >>> print(''.join(results), end="")
2025-07-01 17:49:07.106 - abcDefghiJkl
2025-07-01 17:49:07.107 + abcdefGhijkl
2025-07-01 17:49:07.107 """
2025-07-01 17:49:07.107
2025-07-01 17:49:07.107 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.107 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.107 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.107 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.107 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.107
2025-07-01 17:49:07.107 # search for the pair that matches best without being identical
2025-07-01 17:49:07.107 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.107 # on junk -- unless we have to)
2025-07-01 17:49:07.107 for j in range(blo, bhi):
2025-07-01 17:49:07.107 bj = b[j]
2025-07-01 17:49:07.107 cruncher.set_seq2(bj)
2025-07-01 17:49:07.107 for i in range(alo, ahi):
2025-07-01 17:49:07.107 ai = a[i]
2025-07-01 17:49:07.107 if ai == bj:
2025-07-01 17:49:07.107 if eqi is None:
2025-07-01 17:49:07.107 eqi, eqj = i, j
2025-07-01 17:49:07.107 continue
2025-07-01 17:49:07.108 cruncher.set_seq1(ai)
2025-07-01 17:49:07.108 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.108 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.108 # compares by a factor of 3.
2025-07-01 17:49:07.108 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.108 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.108 # of the computation is cached by cruncher
2025-07-01 17:49:07.108 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.108 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.108 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.108 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.108 if best_ratio < cutoff:
2025-07-01 17:49:07.108 # no non-identical "pretty close" pair
2025-07-01 17:49:07.108 if eqi is None:
2025-07-01 17:49:07.108 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.108 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.108 return
2025-07-01 17:49:07.108 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.108 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.108 else:
2025-07-01 17:49:07.109 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.109 eqi = None
2025-07-01 17:49:07.109
2025-07-01 17:49:07.109 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.109 # identical
2025-07-01 17:49:07.109
2025-07-01 17:49:07.109 # pump out diffs from before the synch point
2025-07-01 17:49:07.109 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.109
2025-07-01 17:49:07.109 # do intraline marking on the synch pair
2025-07-01 17:49:07.109 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.109 if eqi is None:
2025-07-01 17:49:07.109 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.109 atags = btags = ""
2025-07-01 17:49:07.109 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.109 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.109 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.109 if tag == 'replace':
2025-07-01 17:49:07.109 atags += '^' * la
2025-07-01 17:49:07.109 btags += '^' * lb
2025-07-01 17:49:07.109 elif tag == 'delete':
2025-07-01 17:49:07.110 atags += '-' * la
2025-07-01 17:49:07.115 elif tag == 'insert':
2025-07-01 17:49:07.115 btags += '+' * lb
2025-07-01 17:49:07.115 elif tag == 'equal':
2025-07-01 17:49:07.115 atags += ' ' * la
2025-07-01 17:49:07.115 btags += ' ' * lb
2025-07-01 17:49:07.115 else:
2025-07-01 17:49:07.115 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.115 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.115 else:
2025-07-01 17:49:07.115 # the synch pair is identical
2025-07-01 17:49:07.115 yield ' ' + aelt
2025-07-01 17:49:07.115
2025-07-01 17:49:07.115 # pump out diffs from after the synch point
2025-07-01 17:49:07.115 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.115
2025-07-01 17:49:07.115 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.116 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.116
2025-07-01 17:49:07.116 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.116 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.116 alo = 226, ahi = 1101
2025-07-01 17:49:07.116 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.116 blo = 226, bhi = 1101
2025-07-01 17:49:07.116
2025-07-01 17:49:07.116 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.116 g = []
2025-07-01 17:49:07.116 if alo < ahi:
2025-07-01 17:49:07.116 if blo < bhi:
2025-07-01 17:49:07.116 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.116 else:
2025-07-01 17:49:07.116 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.116 elif blo < bhi:
2025-07-01 17:49:07.116 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.116
2025-07-01 17:49:07.116 > yield from g
2025-07-01 17:49:07.117
2025-07-01 17:49:07.117 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.117 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.117
2025-07-01 17:49:07.117 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.117 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.117 alo = 226, ahi = 1101
2025-07-01 17:49:07.117 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.117 blo = 226, bhi = 1101
2025-07-01 17:49:07.117
2025-07-01 17:49:07.117 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.117 r"""
2025-07-01 17:49:07.117 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.117 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.117 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.117 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.117
2025-07-01 17:49:07.117 Example:
2025-07-01 17:49:07.117
2025-07-01 17:49:07.118 >>> d = Differ()
2025-07-01 17:49:07.118 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.118 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.118 >>> print(''.join(results), end="")
2025-07-01 17:49:07.118 - abcDefghiJkl
2025-07-01 17:49:07.118 + abcdefGhijkl
2025-07-01 17:49:07.118 """
2025-07-01 17:49:07.118
2025-07-01 17:49:07.118 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.118 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.118 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.118 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.118 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.118
2025-07-01 17:49:07.118 # search for the pair that matches best without being identical
2025-07-01 17:49:07.118 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.118 # on junk -- unless we have to)
2025-07-01 17:49:07.118 for j in range(blo, bhi):
2025-07-01 17:49:07.119 bj = b[j]
2025-07-01 17:49:07.119 cruncher.set_seq2(bj)
2025-07-01 17:49:07.119 for i in range(alo, ahi):
2025-07-01 17:49:07.119 ai = a[i]
2025-07-01 17:49:07.119 if ai == bj:
2025-07-01 17:49:07.119 if eqi is None:
2025-07-01 17:49:07.119 eqi, eqj = i, j
2025-07-01 17:49:07.119 continue
2025-07-01 17:49:07.119 cruncher.set_seq1(ai)
2025-07-01 17:49:07.119 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.119 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.119 # compares by a factor of 3.
2025-07-01 17:49:07.119 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.119 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.119 # of the computation is cached by cruncher
2025-07-01 17:49:07.119 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.119 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.119 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.119 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.119 if best_ratio < cutoff:
2025-07-01 17:49:07.119 # no non-identical "pretty close" pair
2025-07-01 17:49:07.120 if eqi is None:
2025-07-01 17:49:07.120 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.120 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.120 return
2025-07-01 17:49:07.120 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.120 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.120 else:
2025-07-01 17:49:07.120 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.120 eqi = None
2025-07-01 17:49:07.120
2025-07-01 17:49:07.120 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.120 # identical
2025-07-01 17:49:07.120
2025-07-01 17:49:07.120 # pump out diffs from before the synch point
2025-07-01 17:49:07.120 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.120
2025-07-01 17:49:07.120 # do intraline marking on the synch pair
2025-07-01 17:49:07.120 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.120 if eqi is None:
2025-07-01 17:49:07.120 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.120 atags = btags = ""
2025-07-01 17:49:07.121 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.121 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.121 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.121 if tag == 'replace':
2025-07-01 17:49:07.121 atags += '^' * la
2025-07-01 17:49:07.121 btags += '^' * lb
2025-07-01 17:49:07.121 elif tag == 'delete':
2025-07-01 17:49:07.121 atags += '-' * la
2025-07-01 17:49:07.121 elif tag == 'insert':
2025-07-01 17:49:07.121 btags += '+' * lb
2025-07-01 17:49:07.121 elif tag == 'equal':
2025-07-01 17:49:07.121 atags += ' ' * la
2025-07-01 17:49:07.121 btags += ' ' * lb
2025-07-01 17:49:07.121 else:
2025-07-01 17:49:07.121 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.121 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.121 else:
2025-07-01 17:49:07.121 # the synch pair is identical
2025-07-01 17:49:07.121 yield ' ' + aelt
2025-07-01 17:49:07.121
2025-07-01 17:49:07.121 # pump out diffs from after the synch point
2025-07-01 17:49:07.122 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.122
2025-07-01 17:49:07.122 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.122 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.122
2025-07-01 17:49:07.122 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.122 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.122 alo = 227, ahi = 1101
2025-07-01 17:49:07.122 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.122 blo = 227, bhi = 1101
2025-07-01 17:49:07.122
2025-07-01 17:49:07.122 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.122 g = []
2025-07-01 17:49:07.122 if alo < ahi:
2025-07-01 17:49:07.122 if blo < bhi:
2025-07-01 17:49:07.122 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.122 else:
2025-07-01 17:49:07.122 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.122 elif blo < bhi:
2025-07-01 17:49:07.122 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.122
2025-07-01 17:49:07.123 > yield from g
2025-07-01 17:49:07.123
2025-07-01 17:49:07.123 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.123 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.123
2025-07-01 17:49:07.123 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.123 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.123 alo = 227, ahi = 1101
2025-07-01 17:49:07.123 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.123 blo = 227, bhi = 1101
2025-07-01 17:49:07.123
2025-07-01 17:49:07.123 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.123 r"""
2025-07-01 17:49:07.123 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.123 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.123 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.123 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.123
2025-07-01 17:49:07.123 Example:
2025-07-01 17:49:07.123
2025-07-01 17:49:07.123 >>> d = Differ()
2025-07-01 17:49:07.124 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.124 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.124 >>> print(''.join(results), end="")
2025-07-01 17:49:07.124 - abcDefghiJkl
2025-07-01 17:49:07.124 + abcdefGhijkl
2025-07-01 17:49:07.124 """
2025-07-01 17:49:07.124
2025-07-01 17:49:07.124 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.124 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.124 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.124 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.124 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.124
2025-07-01 17:49:07.124 # search for the pair that matches best without being identical
2025-07-01 17:49:07.124 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.124 # on junk -- unless we have to)
2025-07-01 17:49:07.124 for j in range(blo, bhi):
2025-07-01 17:49:07.124 bj = b[j]
2025-07-01 17:49:07.124 cruncher.set_seq2(bj)
2025-07-01 17:49:07.124 for i in range(alo, ahi):
2025-07-01 17:49:07.125 ai = a[i]
2025-07-01 17:49:07.125 if ai == bj:
2025-07-01 17:49:07.125 if eqi is None:
2025-07-01 17:49:07.125 eqi, eqj = i, j
2025-07-01 17:49:07.125 continue
2025-07-01 17:49:07.125 cruncher.set_seq1(ai)
2025-07-01 17:49:07.125 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.125 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.125 # compares by a factor of 3.
2025-07-01 17:49:07.125 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.125 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.125 # of the computation is cached by cruncher
2025-07-01 17:49:07.125 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.125 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.125 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.125 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.125 if best_ratio < cutoff:
2025-07-01 17:49:07.125 # no non-identical "pretty close" pair
2025-07-01 17:49:07.125 if eqi is None:
2025-07-01 17:49:07.126 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.129 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.129 return
2025-07-01 17:49:07.129 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.129 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.129 else:
2025-07-01 17:49:07.129 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.129 eqi = None
2025-07-01 17:49:07.129
2025-07-01 17:49:07.129 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.129 # identical
2025-07-01 17:49:07.129
2025-07-01 17:49:07.129 # pump out diffs from before the synch point
2025-07-01 17:49:07.129 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.129
2025-07-01 17:49:07.129 # do intraline marking on the synch pair
2025-07-01 17:49:07.129 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.129 if eqi is None:
2025-07-01 17:49:07.129 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.129 atags = btags = ""
2025-07-01 17:49:07.129 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.129 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.130 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.130 if tag == 'replace':
2025-07-01 17:49:07.130 atags += '^' * la
2025-07-01 17:49:07.130 btags += '^' * lb
2025-07-01 17:49:07.130 elif tag == 'delete':
2025-07-01 17:49:07.130 atags += '-' * la
2025-07-01 17:49:07.130 elif tag == 'insert':
2025-07-01 17:49:07.130 btags += '+' * lb
2025-07-01 17:49:07.130 elif tag == 'equal':
2025-07-01 17:49:07.130 atags += ' ' * la
2025-07-01 17:49:07.130 btags += ' ' * lb
2025-07-01 17:49:07.130 else:
2025-07-01 17:49:07.130 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.130 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.130 else:
2025-07-01 17:49:07.130 # the synch pair is identical
2025-07-01 17:49:07.130 yield ' ' + aelt
2025-07-01 17:49:07.130
2025-07-01 17:49:07.130 # pump out diffs from after the synch point
2025-07-01 17:49:07.130 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.130
2025-07-01 17:49:07.131 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.131 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.131
2025-07-01 17:49:07.131 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.131 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.131 alo = 228, ahi = 1101
2025-07-01 17:49:07.131 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.131 blo = 228, bhi = 1101
2025-07-01 17:49:07.131
2025-07-01 17:49:07.131 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.131 g = []
2025-07-01 17:49:07.131 if alo < ahi:
2025-07-01 17:49:07.131 if blo < bhi:
2025-07-01 17:49:07.131 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.131 else:
2025-07-01 17:49:07.131 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.131 elif blo < bhi:
2025-07-01 17:49:07.131 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.131
2025-07-01 17:49:07.131 > yield from g
2025-07-01 17:49:07.131
2025-07-01 17:49:07.132 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.132 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.132
2025-07-01 17:49:07.132 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.132 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.132 alo = 228, ahi = 1101
2025-07-01 17:49:07.132 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.132 blo = 228, bhi = 1101
2025-07-01 17:49:07.132
2025-07-01 17:49:07.132 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.132 r"""
2025-07-01 17:49:07.132 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.132 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.132 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.132 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.132
2025-07-01 17:49:07.132 Example:
2025-07-01 17:49:07.132
2025-07-01 17:49:07.132 >>> d = Differ()
2025-07-01 17:49:07.133 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.133 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.133 >>> print(''.join(results), end="")
2025-07-01 17:49:07.133 - abcDefghiJkl
2025-07-01 17:49:07.133 + abcdefGhijkl
2025-07-01 17:49:07.133 """
2025-07-01 17:49:07.133
2025-07-01 17:49:07.133 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.133 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.133 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.133 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.133 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.133
2025-07-01 17:49:07.133 # search for the pair that matches best without being identical
2025-07-01 17:49:07.133 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.133 # on junk -- unless we have to)
2025-07-01 17:49:07.133 for j in range(blo, bhi):
2025-07-01 17:49:07.133 bj = b[j]
2025-07-01 17:49:07.134 cruncher.set_seq2(bj)
2025-07-01 17:49:07.134 for i in range(alo, ahi):
2025-07-01 17:49:07.134 ai = a[i]
2025-07-01 17:49:07.134 if ai == bj:
2025-07-01 17:49:07.134 if eqi is None:
2025-07-01 17:49:07.134 eqi, eqj = i, j
2025-07-01 17:49:07.134 continue
2025-07-01 17:49:07.134 cruncher.set_seq1(ai)
2025-07-01 17:49:07.134 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.134 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.134 # compares by a factor of 3.
2025-07-01 17:49:07.134 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.134 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.134 # of the computation is cached by cruncher
2025-07-01 17:49:07.134 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.134 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.134 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.134 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.134 if best_ratio < cutoff:
2025-07-01 17:49:07.134 # no non-identical "pretty close" pair
2025-07-01 17:49:07.134 if eqi is None:
2025-07-01 17:49:07.135 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.135 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.135 return
2025-07-01 17:49:07.135 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.135 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.135 else:
2025-07-01 17:49:07.135 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.135 eqi = None
2025-07-01 17:49:07.135
2025-07-01 17:49:07.135 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.135 # identical
2025-07-01 17:49:07.135
2025-07-01 17:49:07.135 # pump out diffs from before the synch point
2025-07-01 17:49:07.135 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.135
2025-07-01 17:49:07.135 # do intraline marking on the synch pair
2025-07-01 17:49:07.135 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.135 if eqi is None:
2025-07-01 17:49:07.135 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.135 atags = btags = ""
2025-07-01 17:49:07.135 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.136 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.136 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.136 if tag == 'replace':
2025-07-01 17:49:07.136 atags += '^' * la
2025-07-01 17:49:07.136 btags += '^' * lb
2025-07-01 17:49:07.136 elif tag == 'delete':
2025-07-01 17:49:07.136 atags += '-' * la
2025-07-01 17:49:07.136 elif tag == 'insert':
2025-07-01 17:49:07.136 btags += '+' * lb
2025-07-01 17:49:07.136 elif tag == 'equal':
2025-07-01 17:49:07.136 atags += ' ' * la
2025-07-01 17:49:07.136 btags += ' ' * lb
2025-07-01 17:49:07.136 else:
2025-07-01 17:49:07.136 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.136 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.136 else:
2025-07-01 17:49:07.136 # the synch pair is identical
2025-07-01 17:49:07.136 yield ' ' + aelt
2025-07-01 17:49:07.136
2025-07-01 17:49:07.136 # pump out diffs from after the synch point
2025-07-01 17:49:07.137 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.137
2025-07-01 17:49:07.137 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.137 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.137
2025-07-01 17:49:07.137 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.137 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.137 alo = 229, ahi = 1101
2025-07-01 17:49:07.137 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.137 blo = 229, bhi = 1101
2025-07-01 17:49:07.137
2025-07-01 17:49:07.137 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.137 g = []
2025-07-01 17:49:07.137 if alo < ahi:
2025-07-01 17:49:07.137 if blo < bhi:
2025-07-01 17:49:07.137 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.137 else:
2025-07-01 17:49:07.137 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.137 elif blo < bhi:
2025-07-01 17:49:07.137 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.138
2025-07-01 17:49:07.138 > yield from g
2025-07-01 17:49:07.138
2025-07-01 17:49:07.138 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.138 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.138
2025-07-01 17:49:07.138 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.138 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.138 alo = 229, ahi = 1101
2025-07-01 17:49:07.138 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.138 blo = 229, bhi = 1101
2025-07-01 17:49:07.138
2025-07-01 17:49:07.138 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.138 r"""
2025-07-01 17:49:07.138 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.138 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.138 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.138 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.138
2025-07-01 17:49:07.138 Example:
2025-07-01 17:49:07.139
2025-07-01 17:49:07.139 >>> d = Differ()
2025-07-01 17:49:07.139 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.139 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.139 >>> print(''.join(results), end="")
2025-07-01 17:49:07.139 - abcDefghiJkl
2025-07-01 17:49:07.139 + abcdefGhijkl
2025-07-01 17:49:07.139 """
2025-07-01 17:49:07.139
2025-07-01 17:49:07.139 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.139 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.139 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.139 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.139 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.139
2025-07-01 17:49:07.139 # search for the pair that matches best without being identical
2025-07-01 17:49:07.139 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.139 # on junk -- unless we have to)
2025-07-01 17:49:07.139 for j in range(blo, bhi):
2025-07-01 17:49:07.140 bj = b[j]
2025-07-01 17:49:07.140 cruncher.set_seq2(bj)
2025-07-01 17:49:07.140 for i in range(alo, ahi):
2025-07-01 17:49:07.140 ai = a[i]
2025-07-01 17:49:07.140 if ai == bj:
2025-07-01 17:49:07.140 if eqi is None:
2025-07-01 17:49:07.140 eqi, eqj = i, j
2025-07-01 17:49:07.140 continue
2025-07-01 17:49:07.140 cruncher.set_seq1(ai)
2025-07-01 17:49:07.140 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.140 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.140 # compares by a factor of 3.
2025-07-01 17:49:07.140 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.140 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.140 # of the computation is cached by cruncher
2025-07-01 17:49:07.140 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.140 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.140 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.140 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.140 if best_ratio < cutoff:
2025-07-01 17:49:07.141 # no non-identical "pretty close" pair
2025-07-01 17:49:07.146 if eqi is None:
2025-07-01 17:49:07.146 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.146 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.146 return
2025-07-01 17:49:07.146 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.146 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.146 else:
2025-07-01 17:49:07.146 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.146 eqi = None
2025-07-01 17:49:07.146
2025-07-01 17:49:07.146 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.146 # identical
2025-07-01 17:49:07.146
2025-07-01 17:49:07.146 # pump out diffs from before the synch point
2025-07-01 17:49:07.146 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.147
2025-07-01 17:49:07.147 # do intraline marking on the synch pair
2025-07-01 17:49:07.147 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.147 if eqi is None:
2025-07-01 17:49:07.147 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.147 atags = btags = ""
2025-07-01 17:49:07.147 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.147 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.147 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.147 if tag == 'replace':
2025-07-01 17:49:07.147 atags += '^' * la
2025-07-01 17:49:07.147 btags += '^' * lb
2025-07-01 17:49:07.147 elif tag == 'delete':
2025-07-01 17:49:07.147 atags += '-' * la
2025-07-01 17:49:07.147 elif tag == 'insert':
2025-07-01 17:49:07.147 btags += '+' * lb
2025-07-01 17:49:07.147 elif tag == 'equal':
2025-07-01 17:49:07.147 atags += ' ' * la
2025-07-01 17:49:07.147 btags += ' ' * lb
2025-07-01 17:49:07.147 else:
2025-07-01 17:49:07.148 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.148 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.148 else:
2025-07-01 17:49:07.148 # the synch pair is identical
2025-07-01 17:49:07.148 yield ' ' + aelt
2025-07-01 17:49:07.148
2025-07-01 17:49:07.148 # pump out diffs from after the synch point
2025-07-01 17:49:07.148 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.148
2025-07-01 17:49:07.148 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.148 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.148
2025-07-01 17:49:07.148 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.148 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.148 alo = 230, ahi = 1101
2025-07-01 17:49:07.148 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.148 blo = 230, bhi = 1101
2025-07-01 17:49:07.148
2025-07-01 17:49:07.148 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.148 g = []
2025-07-01 17:49:07.148 if alo < ahi:
2025-07-01 17:49:07.149 if blo < bhi:
2025-07-01 17:49:07.149 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.149 else:
2025-07-01 17:49:07.149 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.149 elif blo < bhi:
2025-07-01 17:49:07.149 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.149
2025-07-01 17:49:07.149 > yield from g
2025-07-01 17:49:07.149
2025-07-01 17:49:07.149 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.149 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.149
2025-07-01 17:49:07.149 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.149 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.149 alo = 230, ahi = 1101
2025-07-01 17:49:07.149 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.149 blo = 230, bhi = 1101
2025-07-01 17:49:07.149
2025-07-01 17:49:07.149 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.149 r"""
2025-07-01 17:49:07.150 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.150 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.150 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.150 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.150
2025-07-01 17:49:07.150 Example:
2025-07-01 17:49:07.150
2025-07-01 17:49:07.150 >>> d = Differ()
2025-07-01 17:49:07.150 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.150 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.150 >>> print(''.join(results), end="")
2025-07-01 17:49:07.150 - abcDefghiJkl
2025-07-01 17:49:07.150 + abcdefGhijkl
2025-07-01 17:49:07.150 """
2025-07-01 17:49:07.150
2025-07-01 17:49:07.150 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.150 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.150 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.151 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.151 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.151
2025-07-01 17:49:07.151 # search for the pair that matches best without being identical
2025-07-01 17:49:07.151 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.152 # on junk -- unless we have to)
2025-07-01 17:49:07.152 for j in range(blo, bhi):
2025-07-01 17:49:07.152 bj = b[j]
2025-07-01 17:49:07.152 cruncher.set_seq2(bj)
2025-07-01 17:49:07.152 for i in range(alo, ahi):
2025-07-01 17:49:07.152 ai = a[i]
2025-07-01 17:49:07.152 if ai == bj:
2025-07-01 17:49:07.152 if eqi is None:
2025-07-01 17:49:07.152 eqi, eqj = i, j
2025-07-01 17:49:07.152 continue
2025-07-01 17:49:07.152 cruncher.set_seq1(ai)
2025-07-01 17:49:07.152 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.152 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.152 # compares by a factor of 3.
2025-07-01 17:49:07.152 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.152 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.152 # of the computation is cached by cruncher
2025-07-01 17:49:07.152 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.152 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.152 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.153 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.153 if best_ratio < cutoff:
2025-07-01 17:49:07.153 # no non-identical "pretty close" pair
2025-07-01 17:49:07.153 if eqi is None:
2025-07-01 17:49:07.153 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.153 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.153 return
2025-07-01 17:49:07.153 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.153 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.153 else:
2025-07-01 17:49:07.153 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.153 eqi = None
2025-07-01 17:49:07.153
2025-07-01 17:49:07.153 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.153 # identical
2025-07-01 17:49:07.153
2025-07-01 17:49:07.153 # pump out diffs from before the synch point
2025-07-01 17:49:07.153 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.153
2025-07-01 17:49:07.153 # do intraline marking on the synch pair
2025-07-01 17:49:07.153 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.154 if eqi is None:
2025-07-01 17:49:07.154 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.154 atags = btags = ""
2025-07-01 17:49:07.154 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.154 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.154 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.154 if tag == 'replace':
2025-07-01 17:49:07.154 atags += '^' * la
2025-07-01 17:49:07.154 btags += '^' * lb
2025-07-01 17:49:07.154 elif tag == 'delete':
2025-07-01 17:49:07.154 atags += '-' * la
2025-07-01 17:49:07.154 elif tag == 'insert':
2025-07-01 17:49:07.154 btags += '+' * lb
2025-07-01 17:49:07.154 elif tag == 'equal':
2025-07-01 17:49:07.154 atags += ' ' * la
2025-07-01 17:49:07.154 btags += ' ' * lb
2025-07-01 17:49:07.154 else:
2025-07-01 17:49:07.154 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.154 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.154 else:
2025-07-01 17:49:07.154 # the synch pair is identical
2025-07-01 17:49:07.154 yield ' ' + aelt
2025-07-01 17:49:07.155
2025-07-01 17:49:07.155 # pump out diffs from after the synch point
2025-07-01 17:49:07.155 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.155
2025-07-01 17:49:07.155 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.155 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.155
2025-07-01 17:49:07.155 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.155 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.155 alo = 231, ahi = 1101
2025-07-01 17:49:07.155 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.155 blo = 231, bhi = 1101
2025-07-01 17:49:07.155
2025-07-01 17:49:07.155 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.155 g = []
2025-07-01 17:49:07.155 if alo < ahi:
2025-07-01 17:49:07.155 if blo < bhi:
2025-07-01 17:49:07.155 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.155 else:
2025-07-01 17:49:07.155 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.155 elif blo < bhi:
2025-07-01 17:49:07.156 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.156
2025-07-01 17:49:07.156 > yield from g
2025-07-01 17:49:07.156
2025-07-01 17:49:07.156 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.156 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.156
2025-07-01 17:49:07.156 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.156 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.156 alo = 231, ahi = 1101
2025-07-01 17:49:07.156 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.156 blo = 231, bhi = 1101
2025-07-01 17:49:07.156
2025-07-01 17:49:07.156 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.156 r"""
2025-07-01 17:49:07.156 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.156 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.156 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.156 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.156
2025-07-01 17:49:07.156 Example:
2025-07-01 17:49:07.157
2025-07-01 17:49:07.160 >>> d = Differ()
2025-07-01 17:49:07.160 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.160 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.160 >>> print(''.join(results), end="")
2025-07-01 17:49:07.160 - abcDefghiJkl
2025-07-01 17:49:07.160 + abcdefGhijkl
2025-07-01 17:49:07.160 """
2025-07-01 17:49:07.160
2025-07-01 17:49:07.160 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.160 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.160 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.160 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.160 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.160
2025-07-01 17:49:07.160 # search for the pair that matches best without being identical
2025-07-01 17:49:07.160 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.160 # on junk -- unless we have to)
2025-07-01 17:49:07.161 for j in range(blo, bhi):
2025-07-01 17:49:07.161 bj = b[j]
2025-07-01 17:49:07.161 cruncher.set_seq2(bj)
2025-07-01 17:49:07.161 for i in range(alo, ahi):
2025-07-01 17:49:07.161 ai = a[i]
2025-07-01 17:49:07.161 if ai == bj:
2025-07-01 17:49:07.161 if eqi is None:
2025-07-01 17:49:07.161 eqi, eqj = i, j
2025-07-01 17:49:07.161 continue
2025-07-01 17:49:07.161 cruncher.set_seq1(ai)
2025-07-01 17:49:07.161 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.161 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.161 # compares by a factor of 3.
2025-07-01 17:49:07.161 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.161 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.161 # of the computation is cached by cruncher
2025-07-01 17:49:07.161 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.161 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.161 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.161 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.162 if best_ratio < cutoff:
2025-07-01 17:49:07.162 # no non-identical "pretty close" pair
2025-07-01 17:49:07.162 if eqi is None:
2025-07-01 17:49:07.162 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.162 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.162 return
2025-07-01 17:49:07.162 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.162 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.162 else:
2025-07-01 17:49:07.162 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.162 eqi = None
2025-07-01 17:49:07.162
2025-07-01 17:49:07.162 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.162 # identical
2025-07-01 17:49:07.162
2025-07-01 17:49:07.162 # pump out diffs from before the synch point
2025-07-01 17:49:07.162 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.162
2025-07-01 17:49:07.162 # do intraline marking on the synch pair
2025-07-01 17:49:07.162 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.163 if eqi is None:
2025-07-01 17:49:07.163 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.163 atags = btags = ""
2025-07-01 17:49:07.163 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.163 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.163 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.163 if tag == 'replace':
2025-07-01 17:49:07.163 atags += '^' * la
2025-07-01 17:49:07.163 btags += '^' * lb
2025-07-01 17:49:07.163 elif tag == 'delete':
2025-07-01 17:49:07.163 atags += '-' * la
2025-07-01 17:49:07.163 elif tag == 'insert':
2025-07-01 17:49:07.163 btags += '+' * lb
2025-07-01 17:49:07.163 elif tag == 'equal':
2025-07-01 17:49:07.163 atags += ' ' * la
2025-07-01 17:49:07.163 btags += ' ' * lb
2025-07-01 17:49:07.163 else:
2025-07-01 17:49:07.163 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.163 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.163 else:
2025-07-01 17:49:07.164 # the synch pair is identical
2025-07-01 17:49:07.164 yield ' ' + aelt
2025-07-01 17:49:07.164
2025-07-01 17:49:07.164 # pump out diffs from after the synch point
2025-07-01 17:49:07.164 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.164
2025-07-01 17:49:07.164 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.164 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.164
2025-07-01 17:49:07.164 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.164 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.164 alo = 232, ahi = 1101
2025-07-01 17:49:07.164 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.164 blo = 232, bhi = 1101
2025-07-01 17:49:07.164
2025-07-01 17:49:07.164 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.164 g = []
2025-07-01 17:49:07.164 if alo < ahi:
2025-07-01 17:49:07.164 if blo < bhi:
2025-07-01 17:49:07.164 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.165 else:
2025-07-01 17:49:07.165 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.165 elif blo < bhi:
2025-07-01 17:49:07.165 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.165
2025-07-01 17:49:07.165 > yield from g
2025-07-01 17:49:07.165
2025-07-01 17:49:07.165 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.165 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.165
2025-07-01 17:49:07.165 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.165 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.165 alo = 232, ahi = 1101
2025-07-01 17:49:07.165 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.165 blo = 232, bhi = 1101
2025-07-01 17:49:07.165
2025-07-01 17:49:07.165 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.165 r"""
2025-07-01 17:49:07.165 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.165 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.165 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.165 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.166
2025-07-01 17:49:07.166 Example:
2025-07-01 17:49:07.166
2025-07-01 17:49:07.166 >>> d = Differ()
2025-07-01 17:49:07.166 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.166 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.166 >>> print(''.join(results), end="")
2025-07-01 17:49:07.166 - abcDefghiJkl
2025-07-01 17:49:07.166 + abcdefGhijkl
2025-07-01 17:49:07.166 """
2025-07-01 17:49:07.166
2025-07-01 17:49:07.166 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.166 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.166 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.166 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.166 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.166
2025-07-01 17:49:07.166 # search for the pair that matches best without being identical
2025-07-01 17:49:07.166 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.167 # on junk -- unless we have to)
2025-07-01 17:49:07.167 for j in range(blo, bhi):
2025-07-01 17:49:07.167 bj = b[j]
2025-07-01 17:49:07.167 cruncher.set_seq2(bj)
2025-07-01 17:49:07.167 for i in range(alo, ahi):
2025-07-01 17:49:07.167 ai = a[i]
2025-07-01 17:49:07.167 if ai == bj:
2025-07-01 17:49:07.167 if eqi is None:
2025-07-01 17:49:07.167 eqi, eqj = i, j
2025-07-01 17:49:07.167 continue
2025-07-01 17:49:07.167 cruncher.set_seq1(ai)
2025-07-01 17:49:07.167 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.167 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.167 # compares by a factor of 3.
2025-07-01 17:49:07.167 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.167 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.167 # of the computation is cached by cruncher
2025-07-01 17:49:07.167 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.167 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.167 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.167 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.168 if best_ratio < cutoff:
2025-07-01 17:49:07.168 # no non-identical "pretty close" pair
2025-07-01 17:49:07.168 if eqi is None:
2025-07-01 17:49:07.168 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.168 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.168 return
2025-07-01 17:49:07.168 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.168 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.168 else:
2025-07-01 17:49:07.168 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.168 eqi = None
2025-07-01 17:49:07.168
2025-07-01 17:49:07.168 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.168 # identical
2025-07-01 17:49:07.168
2025-07-01 17:49:07.168 # pump out diffs from before the synch point
2025-07-01 17:49:07.168 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.168
2025-07-01 17:49:07.168 # do intraline marking on the synch pair
2025-07-01 17:49:07.168 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.168 if eqi is None:
2025-07-01 17:49:07.168 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.169 atags = btags = ""
2025-07-01 17:49:07.169 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.169 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.169 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.169 if tag == 'replace':
2025-07-01 17:49:07.169 atags += '^' * la
2025-07-01 17:49:07.169 btags += '^' * lb
2025-07-01 17:49:07.169 elif tag == 'delete':
2025-07-01 17:49:07.169 atags += '-' * la
2025-07-01 17:49:07.169 elif tag == 'insert':
2025-07-01 17:49:07.169 btags += '+' * lb
2025-07-01 17:49:07.169 elif tag == 'equal':
2025-07-01 17:49:07.169 atags += ' ' * la
2025-07-01 17:49:07.169 btags += ' ' * lb
2025-07-01 17:49:07.169 else:
2025-07-01 17:49:07.169 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.169 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.169 else:
2025-07-01 17:49:07.169 # the synch pair is identical
2025-07-01 17:49:07.169 yield ' ' + aelt
2025-07-01 17:49:07.169
2025-07-01 17:49:07.170 # pump out diffs from after the synch point
2025-07-01 17:49:07.170 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.170
2025-07-01 17:49:07.170 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.170 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.170
2025-07-01 17:49:07.170 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.170 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.170 alo = 233, ahi = 1101
2025-07-01 17:49:07.170 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.170 blo = 233, bhi = 1101
2025-07-01 17:49:07.170
2025-07-01 17:49:07.170 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.170 g = []
2025-07-01 17:49:07.170 if alo < ahi:
2025-07-01 17:49:07.170 if blo < bhi:
2025-07-01 17:49:07.170 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.170 else:
2025-07-01 17:49:07.170 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.170 elif blo < bhi:
2025-07-01 17:49:07.171 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.171
2025-07-01 17:49:07.171 > yield from g
2025-07-01 17:49:07.171
2025-07-01 17:49:07.171 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.171 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.171
2025-07-01 17:49:07.171 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.171 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.171 alo = 233, ahi = 1101
2025-07-01 17:49:07.171 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.171 blo = 233, bhi = 1101
2025-07-01 17:49:07.171
2025-07-01 17:49:07.171 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.171 r"""
2025-07-01 17:49:07.171 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.171 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.171 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.171 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.171
2025-07-01 17:49:07.172 Example:
2025-07-01 17:49:07.172
2025-07-01 17:49:07.172 >>> d = Differ()
2025-07-01 17:49:07.172 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.172 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.172 >>> print(''.join(results), end="")
2025-07-01 17:49:07.172 - abcDefghiJkl
2025-07-01 17:49:07.172 + abcdefGhijkl
2025-07-01 17:49:07.172 """
2025-07-01 17:49:07.172
2025-07-01 17:49:07.172 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.172 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.172 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.172 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.172 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.172
2025-07-01 17:49:07.172 # search for the pair that matches best without being identical
2025-07-01 17:49:07.172 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.173 # on junk -- unless we have to)
2025-07-01 17:49:07.178 for j in range(blo, bhi):
2025-07-01 17:49:07.178 bj = b[j]
2025-07-01 17:49:07.178 cruncher.set_seq2(bj)
2025-07-01 17:49:07.178 for i in range(alo, ahi):
2025-07-01 17:49:07.178 ai = a[i]
2025-07-01 17:49:07.178 if ai == bj:
2025-07-01 17:49:07.178 if eqi is None:
2025-07-01 17:49:07.178 eqi, eqj = i, j
2025-07-01 17:49:07.178 continue
2025-07-01 17:49:07.178 cruncher.set_seq1(ai)
2025-07-01 17:49:07.178 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.178 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.178 # compares by a factor of 3.
2025-07-01 17:49:07.178 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.178 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.178 # of the computation is cached by cruncher
2025-07-01 17:49:07.178 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.178 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.178 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.179 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.179 if best_ratio < cutoff:
2025-07-01 17:49:07.179 # no non-identical "pretty close" pair
2025-07-01 17:49:07.179 if eqi is None:
2025-07-01 17:49:07.179 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.179 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.179 return
2025-07-01 17:49:07.179 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.179 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.179 else:
2025-07-01 17:49:07.179 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.179 eqi = None
2025-07-01 17:49:07.179
2025-07-01 17:49:07.179 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.179 # identical
2025-07-01 17:49:07.179
2025-07-01 17:49:07.179 # pump out diffs from before the synch point
2025-07-01 17:49:07.179 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.179
2025-07-01 17:49:07.179 # do intraline marking on the synch pair
2025-07-01 17:49:07.180 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.180 if eqi is None:
2025-07-01 17:49:07.180 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.180 atags = btags = ""
2025-07-01 17:49:07.180 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.180 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.180 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.180 if tag == 'replace':
2025-07-01 17:49:07.180 atags += '^' * la
2025-07-01 17:49:07.180 btags += '^' * lb
2025-07-01 17:49:07.180 elif tag == 'delete':
2025-07-01 17:49:07.180 atags += '-' * la
2025-07-01 17:49:07.180 elif tag == 'insert':
2025-07-01 17:49:07.180 btags += '+' * lb
2025-07-01 17:49:07.180 elif tag == 'equal':
2025-07-01 17:49:07.180 atags += ' ' * la
2025-07-01 17:49:07.180 btags += ' ' * lb
2025-07-01 17:49:07.180 else:
2025-07-01 17:49:07.180 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.180 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.181 else:
2025-07-01 17:49:07.181 # the synch pair is identical
2025-07-01 17:49:07.181 yield ' ' + aelt
2025-07-01 17:49:07.181
2025-07-01 17:49:07.181 # pump out diffs from after the synch point
2025-07-01 17:49:07.181 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.181
2025-07-01 17:49:07.181 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.181 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.181
2025-07-01 17:49:07.181 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.181 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.181 alo = 234, ahi = 1101
2025-07-01 17:49:07.181 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.181 blo = 234, bhi = 1101
2025-07-01 17:49:07.181
2025-07-01 17:49:07.181 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.181 g = []
2025-07-01 17:49:07.181 if alo < ahi:
2025-07-01 17:49:07.181 if blo < bhi:
2025-07-01 17:49:07.181 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.182 else:
2025-07-01 17:49:07.182 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.182 elif blo < bhi:
2025-07-01 17:49:07.182 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.182
2025-07-01 17:49:07.182 > yield from g
2025-07-01 17:49:07.182
2025-07-01 17:49:07.182 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.182 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.182
2025-07-01 17:49:07.182 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.182 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.182 alo = 234, ahi = 1101
2025-07-01 17:49:07.182 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.182 blo = 234, bhi = 1101
2025-07-01 17:49:07.182
2025-07-01 17:49:07.182 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.182 r"""
2025-07-01 17:49:07.182 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.182 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.183 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.183 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.183
2025-07-01 17:49:07.183 Example:
2025-07-01 17:49:07.183
2025-07-01 17:49:07.183 >>> d = Differ()
2025-07-01 17:49:07.183 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.183 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.183 >>> print(''.join(results), end="")
2025-07-01 17:49:07.183 - abcDefghiJkl
2025-07-01 17:49:07.183 + abcdefGhijkl
2025-07-01 17:49:07.183 """
2025-07-01 17:49:07.183
2025-07-01 17:49:07.183 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.183 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.183 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.183 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.183 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.183
2025-07-01 17:49:07.183 # search for the pair that matches best without being identical
2025-07-01 17:49:07.184 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.184 # on junk -- unless we have to)
2025-07-01 17:49:07.184 for j in range(blo, bhi):
2025-07-01 17:49:07.184 bj = b[j]
2025-07-01 17:49:07.184 cruncher.set_seq2(bj)
2025-07-01 17:49:07.184 for i in range(alo, ahi):
2025-07-01 17:49:07.184 ai = a[i]
2025-07-01 17:49:07.184 if ai == bj:
2025-07-01 17:49:07.184 if eqi is None:
2025-07-01 17:49:07.184 eqi, eqj = i, j
2025-07-01 17:49:07.184 continue
2025-07-01 17:49:07.184 cruncher.set_seq1(ai)
2025-07-01 17:49:07.184 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.184 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.184 # compares by a factor of 3.
2025-07-01 17:49:07.184 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.184 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.184 # of the computation is cached by cruncher
2025-07-01 17:49:07.184 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.185 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.185 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.185 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.185 if best_ratio < cutoff:
2025-07-01 17:49:07.185 # no non-identical "pretty close" pair
2025-07-01 17:49:07.185 if eqi is None:
2025-07-01 17:49:07.185 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.185 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.185 return
2025-07-01 17:49:07.185 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.185 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.185 else:
2025-07-01 17:49:07.185 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.185 eqi = None
2025-07-01 17:49:07.185
2025-07-01 17:49:07.185 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.185 # identical
2025-07-01 17:49:07.185
2025-07-01 17:49:07.185 # pump out diffs from before the synch point
2025-07-01 17:49:07.186 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.186
2025-07-01 17:49:07.186 # do intraline marking on the synch pair
2025-07-01 17:49:07.186 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.186 if eqi is None:
2025-07-01 17:49:07.186 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.186 atags = btags = ""
2025-07-01 17:49:07.186 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.186 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.186 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.186 if tag == 'replace':
2025-07-01 17:49:07.186 atags += '^' * la
2025-07-01 17:49:07.186 btags += '^' * lb
2025-07-01 17:49:07.186 elif tag == 'delete':
2025-07-01 17:49:07.186 atags += '-' * la
2025-07-01 17:49:07.186 elif tag == 'insert':
2025-07-01 17:49:07.186 btags += '+' * lb
2025-07-01 17:49:07.186 elif tag == 'equal':
2025-07-01 17:49:07.186 atags += ' ' * la
2025-07-01 17:49:07.186 btags += ' ' * lb
2025-07-01 17:49:07.186 else:
2025-07-01 17:49:07.187 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.187 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.187 else:
2025-07-01 17:49:07.187 # the synch pair is identical
2025-07-01 17:49:07.187 yield ' ' + aelt
2025-07-01 17:49:07.187
2025-07-01 17:49:07.187 # pump out diffs from after the synch point
2025-07-01 17:49:07.187 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.187
2025-07-01 17:49:07.187 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.187 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.187
2025-07-01 17:49:07.187 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.187 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.187 alo = 235, ahi = 1101
2025-07-01 17:49:07.187 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.187 blo = 235, bhi = 1101
2025-07-01 17:49:07.187
2025-07-01 17:49:07.187 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.187 g = []
2025-07-01 17:49:07.187 if alo < ahi:
2025-07-01 17:49:07.188 if blo < bhi:
2025-07-01 17:49:07.188 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.190 else:
2025-07-01 17:49:07.191 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.191 elif blo < bhi:
2025-07-01 17:49:07.191 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.191
2025-07-01 17:49:07.191 > yield from g
2025-07-01 17:49:07.191
2025-07-01 17:49:07.191 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.191 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.191
2025-07-01 17:49:07.191 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.191 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.191 alo = 235, ahi = 1101
2025-07-01 17:49:07.191 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.191 blo = 235, bhi = 1101
2025-07-01 17:49:07.191
2025-07-01 17:49:07.191 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.191 r"""
2025-07-01 17:49:07.191 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.191 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.192 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.192 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.192
2025-07-01 17:49:07.192 Example:
2025-07-01 17:49:07.192
2025-07-01 17:49:07.192 >>> d = Differ()
2025-07-01 17:49:07.192 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.192 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.192 >>> print(''.join(results), end="")
2025-07-01 17:49:07.192 - abcDefghiJkl
2025-07-01 17:49:07.192 + abcdefGhijkl
2025-07-01 17:49:07.192 """
2025-07-01 17:49:07.192
2025-07-01 17:49:07.192 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.192 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.192 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.192 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.192 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.192
2025-07-01 17:49:07.192 # search for the pair that matches best without being identical
2025-07-01 17:49:07.193 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.193 # on junk -- unless we have to)
2025-07-01 17:49:07.193 for j in range(blo, bhi):
2025-07-01 17:49:07.193 bj = b[j]
2025-07-01 17:49:07.193 cruncher.set_seq2(bj)
2025-07-01 17:49:07.193 for i in range(alo, ahi):
2025-07-01 17:49:07.193 ai = a[i]
2025-07-01 17:49:07.193 if ai == bj:
2025-07-01 17:49:07.193 if eqi is None:
2025-07-01 17:49:07.193 eqi, eqj = i, j
2025-07-01 17:49:07.193 continue
2025-07-01 17:49:07.193 cruncher.set_seq1(ai)
2025-07-01 17:49:07.193 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.193 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.193 # compares by a factor of 3.
2025-07-01 17:49:07.193 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.193 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.193 # of the computation is cached by cruncher
2025-07-01 17:49:07.193 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.193 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.194 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.194 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.194 if best_ratio < cutoff:
2025-07-01 17:49:07.194 # no non-identical "pretty close" pair
2025-07-01 17:49:07.194 if eqi is None:
2025-07-01 17:49:07.194 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.194 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.194 return
2025-07-01 17:49:07.194 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.194 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.194 else:
2025-07-01 17:49:07.194 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.194 eqi = None
2025-07-01 17:49:07.194
2025-07-01 17:49:07.194 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.194 # identical
2025-07-01 17:49:07.194
2025-07-01 17:49:07.194 # pump out diffs from before the synch point
2025-07-01 17:49:07.194 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.195
2025-07-01 17:49:07.195 # do intraline marking on the synch pair
2025-07-01 17:49:07.195 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.195 if eqi is None:
2025-07-01 17:49:07.195 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.195 atags = btags = ""
2025-07-01 17:49:07.195 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.195 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.195 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.195 if tag == 'replace':
2025-07-01 17:49:07.195 atags += '^' * la
2025-07-01 17:49:07.195 btags += '^' * lb
2025-07-01 17:49:07.195 elif tag == 'delete':
2025-07-01 17:49:07.195 atags += '-' * la
2025-07-01 17:49:07.195 elif tag == 'insert':
2025-07-01 17:49:07.195 btags += '+' * lb
2025-07-01 17:49:07.195 elif tag == 'equal':
2025-07-01 17:49:07.195 atags += ' ' * la
2025-07-01 17:49:07.195 btags += ' ' * lb
2025-07-01 17:49:07.196 else:
2025-07-01 17:49:07.196 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.196 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.196 else:
2025-07-01 17:49:07.196 # the synch pair is identical
2025-07-01 17:49:07.196 yield ' ' + aelt
2025-07-01 17:49:07.196
2025-07-01 17:49:07.196 # pump out diffs from after the synch point
2025-07-01 17:49:07.196 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.196
2025-07-01 17:49:07.196 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.196 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.196
2025-07-01 17:49:07.196 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.196 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.196 alo = 236, ahi = 1101
2025-07-01 17:49:07.196 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.196 blo = 236, bhi = 1101
2025-07-01 17:49:07.196
2025-07-01 17:49:07.196 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.197 g = []
2025-07-01 17:49:07.197 if alo < ahi:
2025-07-01 17:49:07.197 if blo < bhi:
2025-07-01 17:49:07.197 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.197 else:
2025-07-01 17:49:07.197 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.197 elif blo < bhi:
2025-07-01 17:49:07.197 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.197
2025-07-01 17:49:07.197 > yield from g
2025-07-01 17:49:07.197
2025-07-01 17:49:07.197 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.197 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.197
2025-07-01 17:49:07.197 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.197 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.197 alo = 236, ahi = 1101
2025-07-01 17:49:07.197 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.198 blo = 236, bhi = 1101
2025-07-01 17:49:07.198
2025-07-01 17:49:07.198 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.198 r"""
2025-07-01 17:49:07.198 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.198 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.198 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.198 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.198
2025-07-01 17:49:07.198 Example:
2025-07-01 17:49:07.198
2025-07-01 17:49:07.198 >>> d = Differ()
2025-07-01 17:49:07.198 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.198 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.198 >>> print(''.join(results), end="")
2025-07-01 17:49:07.198 - abcDefghiJkl
2025-07-01 17:49:07.198 + abcdefGhijkl
2025-07-01 17:49:07.198 """
2025-07-01 17:49:07.198
2025-07-01 17:49:07.199 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.199 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.199 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.199 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.199 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.199
2025-07-01 17:49:07.199 # search for the pair that matches best without being identical
2025-07-01 17:49:07.199 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.199 # on junk -- unless we have to)
2025-07-01 17:49:07.199 for j in range(blo, bhi):
2025-07-01 17:49:07.199 bj = b[j]
2025-07-01 17:49:07.199 cruncher.set_seq2(bj)
2025-07-01 17:49:07.199 for i in range(alo, ahi):
2025-07-01 17:49:07.199 ai = a[i]
2025-07-01 17:49:07.199 if ai == bj:
2025-07-01 17:49:07.199 if eqi is None:
2025-07-01 17:49:07.199 eqi, eqj = i, j
2025-07-01 17:49:07.199 continue
2025-07-01 17:49:07.199 cruncher.set_seq1(ai)
2025-07-01 17:49:07.199 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.199 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.199 # compares by a factor of 3.
2025-07-01 17:49:07.200 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.200 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.200 # of the computation is cached by cruncher
2025-07-01 17:49:07.200 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.200 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.200 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.200 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.200 if best_ratio < cutoff:
2025-07-01 17:49:07.200 # no non-identical "pretty close" pair
2025-07-01 17:49:07.200 if eqi is None:
2025-07-01 17:49:07.200 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.200 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.200 return
2025-07-01 17:49:07.200 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.200 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.200 else:
2025-07-01 17:49:07.200 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.200 eqi = None
2025-07-01 17:49:07.200
2025-07-01 17:49:07.200 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.200 # identical
2025-07-01 17:49:07.201
2025-07-01 17:49:07.201 # pump out diffs from before the synch point
2025-07-01 17:49:07.201 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.201
2025-07-01 17:49:07.201 # do intraline marking on the synch pair
2025-07-01 17:49:07.201 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.201 if eqi is None:
2025-07-01 17:49:07.201 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.201 atags = btags = ""
2025-07-01 17:49:07.201 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.201 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.201 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.201 if tag == 'replace':
2025-07-01 17:49:07.201 atags += '^' * la
2025-07-01 17:49:07.201 btags += '^' * lb
2025-07-01 17:49:07.201 elif tag == 'delete':
2025-07-01 17:49:07.201 atags += '-' * la
2025-07-01 17:49:07.201 elif tag == 'insert':
2025-07-01 17:49:07.201 btags += '+' * lb
2025-07-01 17:49:07.201 elif tag == 'equal':
2025-07-01 17:49:07.201 atags += ' ' * la
2025-07-01 17:49:07.202 btags += ' ' * lb
2025-07-01 17:49:07.202 else:
2025-07-01 17:49:07.202 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.202 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.202 else:
2025-07-01 17:49:07.202 # the synch pair is identical
2025-07-01 17:49:07.202 yield ' ' + aelt
2025-07-01 17:49:07.202
2025-07-01 17:49:07.202 # pump out diffs from after the synch point
2025-07-01 17:49:07.202 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.202
2025-07-01 17:49:07.202 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.202 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.202
2025-07-01 17:49:07.202 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.202 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.202 alo = 237, ahi = 1101
2025-07-01 17:49:07.202 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.202 blo = 237, bhi = 1101
2025-07-01 17:49:07.202
2025-07-01 17:49:07.202 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.203 g = []
2025-07-01 17:49:07.203 if alo < ahi:
2025-07-01 17:49:07.203 if blo < bhi:
2025-07-01 17:49:07.203 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.203 else:
2025-07-01 17:49:07.203 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.203 elif blo < bhi:
2025-07-01 17:49:07.203 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.203
2025-07-01 17:49:07.203 > yield from g
2025-07-01 17:49:07.203
2025-07-01 17:49:07.203 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.203 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.203
2025-07-01 17:49:07.203 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.203 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.203 alo = 237, ahi = 1101
2025-07-01 17:49:07.203 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.203 blo = 237, bhi = 1101
2025-07-01 17:49:07.203
2025-07-01 17:49:07.203 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.204 r"""
2025-07-01 17:49:07.209 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.209 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.209 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.209 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.209
2025-07-01 17:49:07.209 Example:
2025-07-01 17:49:07.209
2025-07-01 17:49:07.209 >>> d = Differ()
2025-07-01 17:49:07.209 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.209 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.209 >>> print(''.join(results), end="")
2025-07-01 17:49:07.209 - abcDefghiJkl
2025-07-01 17:49:07.209 + abcdefGhijkl
2025-07-01 17:49:07.209 """
2025-07-01 17:49:07.209
2025-07-01 17:49:07.209 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.209 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.209 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.210 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.210 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.210
2025-07-01 17:49:07.210 # search for the pair that matches best without being identical
2025-07-01 17:49:07.210 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.210 # on junk -- unless we have to)
2025-07-01 17:49:07.210 for j in range(blo, bhi):
2025-07-01 17:49:07.210 bj = b[j]
2025-07-01 17:49:07.210 cruncher.set_seq2(bj)
2025-07-01 17:49:07.210 for i in range(alo, ahi):
2025-07-01 17:49:07.210 ai = a[i]
2025-07-01 17:49:07.210 if ai == bj:
2025-07-01 17:49:07.210 if eqi is None:
2025-07-01 17:49:07.210 eqi, eqj = i, j
2025-07-01 17:49:07.210 continue
2025-07-01 17:49:07.210 cruncher.set_seq1(ai)
2025-07-01 17:49:07.210 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.210 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.210 # compares by a factor of 3.
2025-07-01 17:49:07.210 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.210 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.211 # of the computation is cached by cruncher
2025-07-01 17:49:07.211 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.211 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.211 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.211 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.211 if best_ratio < cutoff:
2025-07-01 17:49:07.211 # no non-identical "pretty close" pair
2025-07-01 17:49:07.211 if eqi is None:
2025-07-01 17:49:07.211 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.211 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.211 return
2025-07-01 17:49:07.211 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.211 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.211 else:
2025-07-01 17:49:07.211 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.211 eqi = None
2025-07-01 17:49:07.211
2025-07-01 17:49:07.211 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.211 # identical
2025-07-01 17:49:07.211
2025-07-01 17:49:07.211 # pump out diffs from before the synch point
2025-07-01 17:49:07.212 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.212
2025-07-01 17:49:07.212 # do intraline marking on the synch pair
2025-07-01 17:49:07.212 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.212 if eqi is None:
2025-07-01 17:49:07.212 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.212 atags = btags = ""
2025-07-01 17:49:07.212 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.212 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.212 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.212 if tag == 'replace':
2025-07-01 17:49:07.212 atags += '^' * la
2025-07-01 17:49:07.212 btags += '^' * lb
2025-07-01 17:49:07.212 elif tag == 'delete':
2025-07-01 17:49:07.212 atags += '-' * la
2025-07-01 17:49:07.212 elif tag == 'insert':
2025-07-01 17:49:07.212 btags += '+' * lb
2025-07-01 17:49:07.212 elif tag == 'equal':
2025-07-01 17:49:07.212 atags += ' ' * la
2025-07-01 17:49:07.212 btags += ' ' * lb
2025-07-01 17:49:07.212 else:
2025-07-01 17:49:07.213 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.213 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.213 else:
2025-07-01 17:49:07.213 # the synch pair is identical
2025-07-01 17:49:07.213 yield ' ' + aelt
2025-07-01 17:49:07.213
2025-07-01 17:49:07.213 # pump out diffs from after the synch point
2025-07-01 17:49:07.213 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.213
2025-07-01 17:49:07.213 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.213 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.213
2025-07-01 17:49:07.213 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.213 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.213 alo = 238, ahi = 1101
2025-07-01 17:49:07.213 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.213 blo = 238, bhi = 1101
2025-07-01 17:49:07.213
2025-07-01 17:49:07.213 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.213 g = []
2025-07-01 17:49:07.213 if alo < ahi:
2025-07-01 17:49:07.214 if blo < bhi:
2025-07-01 17:49:07.214 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.214 else:
2025-07-01 17:49:07.214 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.214 elif blo < bhi:
2025-07-01 17:49:07.214 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.214
2025-07-01 17:49:07.214 > yield from g
2025-07-01 17:49:07.214
2025-07-01 17:49:07.214 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.214 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.214
2025-07-01 17:49:07.214 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.214 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.214 alo = 238, ahi = 1101
2025-07-01 17:49:07.214 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.214 blo = 238, bhi = 1101
2025-07-01 17:49:07.214
2025-07-01 17:49:07.214 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.214 r"""
2025-07-01 17:49:07.214 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.214 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.215 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.215 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.215
2025-07-01 17:49:07.215 Example:
2025-07-01 17:49:07.215
2025-07-01 17:49:07.215 >>> d = Differ()
2025-07-01 17:49:07.215 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.215 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.215 >>> print(''.join(results), end="")
2025-07-01 17:49:07.215 - abcDefghiJkl
2025-07-01 17:49:07.215 + abcdefGhijkl
2025-07-01 17:49:07.215 """
2025-07-01 17:49:07.215
2025-07-01 17:49:07.215 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.215 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.215 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.215 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.215 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.215
2025-07-01 17:49:07.216 # search for the pair that matches best without being identical
2025-07-01 17:49:07.216 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.216 # on junk -- unless we have to)
2025-07-01 17:49:07.216 for j in range(blo, bhi):
2025-07-01 17:49:07.216 bj = b[j]
2025-07-01 17:49:07.216 cruncher.set_seq2(bj)
2025-07-01 17:49:07.216 for i in range(alo, ahi):
2025-07-01 17:49:07.216 ai = a[i]
2025-07-01 17:49:07.216 if ai == bj:
2025-07-01 17:49:07.216 if eqi is None:
2025-07-01 17:49:07.216 eqi, eqj = i, j
2025-07-01 17:49:07.216 continue
2025-07-01 17:49:07.216 cruncher.set_seq1(ai)
2025-07-01 17:49:07.216 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.216 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.216 # compares by a factor of 3.
2025-07-01 17:49:07.216 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.216 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.216 # of the computation is cached by cruncher
2025-07-01 17:49:07.217 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.217 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.217 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.217 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.217 if best_ratio < cutoff:
2025-07-01 17:49:07.217 # no non-identical "pretty close" pair
2025-07-01 17:49:07.217 if eqi is None:
2025-07-01 17:49:07.217 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.217 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.217 return
2025-07-01 17:49:07.217 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.217 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.217 else:
2025-07-01 17:49:07.217 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.217 eqi = None
2025-07-01 17:49:07.217
2025-07-01 17:49:07.217 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.217 # identical
2025-07-01 17:49:07.217
2025-07-01 17:49:07.217 # pump out diffs from before the synch point
2025-07-01 17:49:07.218 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.218
2025-07-01 17:49:07.218 # do intraline marking on the synch pair
2025-07-01 17:49:07.218 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.218 if eqi is None:
2025-07-01 17:49:07.218 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.218 atags = btags = ""
2025-07-01 17:49:07.218 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.218 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.218 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.218 if tag == 'replace':
2025-07-01 17:49:07.218 atags += '^' * la
2025-07-01 17:49:07.218 btags += '^' * lb
2025-07-01 17:49:07.218 elif tag == 'delete':
2025-07-01 17:49:07.218 atags += '-' * la
2025-07-01 17:49:07.218 elif tag == 'insert':
2025-07-01 17:49:07.218 btags += '+' * lb
2025-07-01 17:49:07.218 elif tag == 'equal':
2025-07-01 17:49:07.218 atags += ' ' * la
2025-07-01 17:49:07.218 btags += ' ' * lb
2025-07-01 17:49:07.219 else:
2025-07-01 17:49:07.222 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.222 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.222 else:
2025-07-01 17:49:07.222 # the synch pair is identical
2025-07-01 17:49:07.222 yield ' ' + aelt
2025-07-01 17:49:07.222
2025-07-01 17:49:07.222 # pump out diffs from after the synch point
2025-07-01 17:49:07.222 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.222
2025-07-01 17:49:07.222 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.222 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.222
2025-07-01 17:49:07.222 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.222 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.222 alo = 239, ahi = 1101
2025-07-01 17:49:07.222 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.223 blo = 239, bhi = 1101
2025-07-01 17:49:07.223
2025-07-01 17:49:07.223 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.223 g = []
2025-07-01 17:49:07.223 if alo < ahi:
2025-07-01 17:49:07.223 if blo < bhi:
2025-07-01 17:49:07.223 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.223 else:
2025-07-01 17:49:07.223 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.223 elif blo < bhi:
2025-07-01 17:49:07.223 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.223
2025-07-01 17:49:07.223 > yield from g
2025-07-01 17:49:07.223
2025-07-01 17:49:07.223 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.223 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.223
2025-07-01 17:49:07.223 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.223 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.223 alo = 239, ahi = 1101
2025-07-01 17:49:07.224 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.224 blo = 239, bhi = 1101
2025-07-01 17:49:07.224
2025-07-01 17:49:07.224 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.224 r"""
2025-07-01 17:49:07.224 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.224 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.224 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.224 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.224
2025-07-01 17:49:07.224 Example:
2025-07-01 17:49:07.224
2025-07-01 17:49:07.224 >>> d = Differ()
2025-07-01 17:49:07.224 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.224 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.224 >>> print(''.join(results), end="")
2025-07-01 17:49:07.224 - abcDefghiJkl
2025-07-01 17:49:07.224 + abcdefGhijkl
2025-07-01 17:49:07.224 """
2025-07-01 17:49:07.225
2025-07-01 17:49:07.225 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.225 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.225 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.225 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.225 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.225
2025-07-01 17:49:07.225 # search for the pair that matches best without being identical
2025-07-01 17:49:07.225 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.225 # on junk -- unless we have to)
2025-07-01 17:49:07.225 for j in range(blo, bhi):
2025-07-01 17:49:07.225 bj = b[j]
2025-07-01 17:49:07.225 cruncher.set_seq2(bj)
2025-07-01 17:49:07.225 for i in range(alo, ahi):
2025-07-01 17:49:07.225 ai = a[i]
2025-07-01 17:49:07.225 if ai == bj:
2025-07-01 17:49:07.225 if eqi is None:
2025-07-01 17:49:07.225 eqi, eqj = i, j
2025-07-01 17:49:07.225 continue
2025-07-01 17:49:07.225 cruncher.set_seq1(ai)
2025-07-01 17:49:07.226 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.226 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.226 # compares by a factor of 3.
2025-07-01 17:49:07.226 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.226 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.226 # of the computation is cached by cruncher
2025-07-01 17:49:07.226 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.226 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.226 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.226 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.226 if best_ratio < cutoff:
2025-07-01 17:49:07.226 # no non-identical "pretty close" pair
2025-07-01 17:49:07.226 if eqi is None:
2025-07-01 17:49:07.226 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.226 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.226 return
2025-07-01 17:49:07.226 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.226 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.226 else:
2025-07-01 17:49:07.226 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.226 eqi = None
2025-07-01 17:49:07.227
2025-07-01 17:49:07.227 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.227 # identical
2025-07-01 17:49:07.227
2025-07-01 17:49:07.227 # pump out diffs from before the synch point
2025-07-01 17:49:07.227 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.227
2025-07-01 17:49:07.227 # do intraline marking on the synch pair
2025-07-01 17:49:07.227 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.227 if eqi is None:
2025-07-01 17:49:07.227 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.227 atags = btags = ""
2025-07-01 17:49:07.227 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.227 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.227 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.227 if tag == 'replace':
2025-07-01 17:49:07.227 atags += '^' * la
2025-07-01 17:49:07.227 btags += '^' * lb
2025-07-01 17:49:07.227 elif tag == 'delete':
2025-07-01 17:49:07.227 atags += '-' * la
2025-07-01 17:49:07.228 elif tag == 'insert':
2025-07-01 17:49:07.228 btags += '+' * lb
2025-07-01 17:49:07.228 elif tag == 'equal':
2025-07-01 17:49:07.228 atags += ' ' * la
2025-07-01 17:49:07.228 btags += ' ' * lb
2025-07-01 17:49:07.228 else:
2025-07-01 17:49:07.228 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.228 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.228 else:
2025-07-01 17:49:07.228 # the synch pair is identical
2025-07-01 17:49:07.228 yield ' ' + aelt
2025-07-01 17:49:07.228
2025-07-01 17:49:07.228 # pump out diffs from after the synch point
2025-07-01 17:49:07.228 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.228
2025-07-01 17:49:07.228 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.228 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.228
2025-07-01 17:49:07.228 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.228 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.228 alo = 240, ahi = 1101
2025-07-01 17:49:07.229 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.229 blo = 240, bhi = 1101
2025-07-01 17:49:07.229
2025-07-01 17:49:07.229 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.229 g = []
2025-07-01 17:49:07.229 if alo < ahi:
2025-07-01 17:49:07.229 if blo < bhi:
2025-07-01 17:49:07.229 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.229 else:
2025-07-01 17:49:07.229 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.229 elif blo < bhi:
2025-07-01 17:49:07.229 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.229
2025-07-01 17:49:07.229 > yield from g
2025-07-01 17:49:07.229
2025-07-01 17:49:07.229 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.229 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.229
2025-07-01 17:49:07.229 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.230 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.230 alo = 240, ahi = 1101
2025-07-01 17:49:07.230 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.230 blo = 240, bhi = 1101
2025-07-01 17:49:07.230
2025-07-01 17:49:07.230 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.230 r"""
2025-07-01 17:49:07.230 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.230 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.230 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.230 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.230
2025-07-01 17:49:07.230 Example:
2025-07-01 17:49:07.230
2025-07-01 17:49:07.230 >>> d = Differ()
2025-07-01 17:49:07.230 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.230 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.230 >>> print(''.join(results), end="")
2025-07-01 17:49:07.230 - abcDefghiJkl
2025-07-01 17:49:07.231 + abcdefGhijkl
2025-07-01 17:49:07.231 """
2025-07-01 17:49:07.231
2025-07-01 17:49:07.231 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.231 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.231 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.231 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.231 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.231
2025-07-01 17:49:07.231 # search for the pair that matches best without being identical
2025-07-01 17:49:07.231 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.231 # on junk -- unless we have to)
2025-07-01 17:49:07.231 for j in range(blo, bhi):
2025-07-01 17:49:07.231 bj = b[j]
2025-07-01 17:49:07.231 cruncher.set_seq2(bj)
2025-07-01 17:49:07.231 for i in range(alo, ahi):
2025-07-01 17:49:07.231 ai = a[i]
2025-07-01 17:49:07.231 if ai == bj:
2025-07-01 17:49:07.231 if eqi is None:
2025-07-01 17:49:07.231 eqi, eqj = i, j
2025-07-01 17:49:07.232 continue
2025-07-01 17:49:07.232 cruncher.set_seq1(ai)
2025-07-01 17:49:07.232 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.232 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.232 # compares by a factor of 3.
2025-07-01 17:49:07.232 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.232 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.232 # of the computation is cached by cruncher
2025-07-01 17:49:07.232 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.232 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.232 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.232 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.232 if best_ratio < cutoff:
2025-07-01 17:49:07.232 # no non-identical "pretty close" pair
2025-07-01 17:49:07.232 if eqi is None:
2025-07-01 17:49:07.232 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.232 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.232 return
2025-07-01 17:49:07.232 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.232 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.233 else:
2025-07-01 17:49:07.233 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.233 eqi = None
2025-07-01 17:49:07.233
2025-07-01 17:49:07.233 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.233 # identical
2025-07-01 17:49:07.233
2025-07-01 17:49:07.233 # pump out diffs from before the synch point
2025-07-01 17:49:07.233 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.233
2025-07-01 17:49:07.233 # do intraline marking on the synch pair
2025-07-01 17:49:07.233 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.233 if eqi is None:
2025-07-01 17:49:07.233 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.233 atags = btags = ""
2025-07-01 17:49:07.233 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.233 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.233 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.233 if tag == 'replace':
2025-07-01 17:49:07.233 atags += '^' * la
2025-07-01 17:49:07.233 btags += '^' * lb
2025-07-01 17:49:07.234 elif tag == 'delete':
2025-07-01 17:49:07.234 atags += '-' * la
2025-07-01 17:49:07.234 elif tag == 'insert':
2025-07-01 17:49:07.234 btags += '+' * lb
2025-07-01 17:49:07.234 elif tag == 'equal':
2025-07-01 17:49:07.234 atags += ' ' * la
2025-07-01 17:49:07.234 btags += ' ' * lb
2025-07-01 17:49:07.234 else:
2025-07-01 17:49:07.234 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.234 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.234 else:
2025-07-01 17:49:07.234 # the synch pair is identical
2025-07-01 17:49:07.234 yield ' ' + aelt
2025-07-01 17:49:07.234
2025-07-01 17:49:07.234 # pump out diffs from after the synch point
2025-07-01 17:49:07.234 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.234
2025-07-01 17:49:07.234 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.234 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.234
2025-07-01 17:49:07.234 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.235 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.240 alo = 241, ahi = 1101
2025-07-01 17:49:07.240 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.240 blo = 241, bhi = 1101
2025-07-01 17:49:07.240
2025-07-01 17:49:07.240 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.240 g = []
2025-07-01 17:49:07.240 if alo < ahi:
2025-07-01 17:49:07.240 if blo < bhi:
2025-07-01 17:49:07.240 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.240 else:
2025-07-01 17:49:07.240 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.240 elif blo < bhi:
2025-07-01 17:49:07.240 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.240
2025-07-01 17:49:07.240 > yield from g
2025-07-01 17:49:07.240
2025-07-01 17:49:07.240 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.240 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.240
2025-07-01 17:49:07.241 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.241 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.241 alo = 241, ahi = 1101
2025-07-01 17:49:07.241 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.241 blo = 241, bhi = 1101
2025-07-01 17:49:07.241
2025-07-01 17:49:07.241 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.241 r"""
2025-07-01 17:49:07.241 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.241 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.241 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.241 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.241
2025-07-01 17:49:07.241 Example:
2025-07-01 17:49:07.241
2025-07-01 17:49:07.241 >>> d = Differ()
2025-07-01 17:49:07.241 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.241 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.241 >>> print(''.join(results), end="")
2025-07-01 17:49:07.241 - abcDefghiJkl
2025-07-01 17:49:07.242 + abcdefGhijkl
2025-07-01 17:49:07.242 """
2025-07-01 17:49:07.242
2025-07-01 17:49:07.242 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.242 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.242 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.242 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.242 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.242
2025-07-01 17:49:07.242 # search for the pair that matches best without being identical
2025-07-01 17:49:07.242 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.242 # on junk -- unless we have to)
2025-07-01 17:49:07.242 for j in range(blo, bhi):
2025-07-01 17:49:07.242 bj = b[j]
2025-07-01 17:49:07.242 cruncher.set_seq2(bj)
2025-07-01 17:49:07.242 for i in range(alo, ahi):
2025-07-01 17:49:07.242 ai = a[i]
2025-07-01 17:49:07.243 if ai == bj:
2025-07-01 17:49:07.243 if eqi is None:
2025-07-01 17:49:07.243 eqi, eqj = i, j
2025-07-01 17:49:07.243 continue
2025-07-01 17:49:07.243 cruncher.set_seq1(ai)
2025-07-01 17:49:07.243 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.243 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.243 # compares by a factor of 3.
2025-07-01 17:49:07.243 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.243 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.243 # of the computation is cached by cruncher
2025-07-01 17:49:07.243 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.243 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.243 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.243 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.243 if best_ratio < cutoff:
2025-07-01 17:49:07.243 # no non-identical "pretty close" pair
2025-07-01 17:49:07.243 if eqi is None:
2025-07-01 17:49:07.243 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.243 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.244 return
2025-07-01 17:49:07.244 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.244 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.244 else:
2025-07-01 17:49:07.244 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.244 eqi = None
2025-07-01 17:49:07.244
2025-07-01 17:49:07.244 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.244 # identical
2025-07-01 17:49:07.244
2025-07-01 17:49:07.244 # pump out diffs from before the synch point
2025-07-01 17:49:07.244 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.244
2025-07-01 17:49:07.244 # do intraline marking on the synch pair
2025-07-01 17:49:07.244 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.244 if eqi is None:
2025-07-01 17:49:07.244 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.244 atags = btags = ""
2025-07-01 17:49:07.244 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.244 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.244 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.245 if tag == 'replace':
2025-07-01 17:49:07.245 atags += '^' * la
2025-07-01 17:49:07.245 btags += '^' * lb
2025-07-01 17:49:07.245 elif tag == 'delete':
2025-07-01 17:49:07.245 atags += '-' * la
2025-07-01 17:49:07.245 elif tag == 'insert':
2025-07-01 17:49:07.245 btags += '+' * lb
2025-07-01 17:49:07.245 elif tag == 'equal':
2025-07-01 17:49:07.245 atags += ' ' * la
2025-07-01 17:49:07.245 btags += ' ' * lb
2025-07-01 17:49:07.245 else:
2025-07-01 17:49:07.245 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.245 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.245 else:
2025-07-01 17:49:07.245 # the synch pair is identical
2025-07-01 17:49:07.245 yield ' ' + aelt
2025-07-01 17:49:07.245
2025-07-01 17:49:07.245 # pump out diffs from after the synch point
2025-07-01 17:49:07.245 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.245
2025-07-01 17:49:07.245 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.246 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.246
2025-07-01 17:49:07.246 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.246 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.246 alo = 242, ahi = 1101
2025-07-01 17:49:07.246 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.246 blo = 242, bhi = 1101
2025-07-01 17:49:07.246
2025-07-01 17:49:07.246 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.246 g = []
2025-07-01 17:49:07.246 if alo < ahi:
2025-07-01 17:49:07.246 if blo < bhi:
2025-07-01 17:49:07.246 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.246 else:
2025-07-01 17:49:07.246 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.246 elif blo < bhi:
2025-07-01 17:49:07.246 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.246
2025-07-01 17:49:07.246 > yield from g
2025-07-01 17:49:07.246
2025-07-01 17:49:07.246 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.247 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.247
2025-07-01 17:49:07.247 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.247 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.247 alo = 242, ahi = 1101
2025-07-01 17:49:07.247 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.247 blo = 242, bhi = 1101
2025-07-01 17:49:07.247
2025-07-01 17:49:07.247 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.247 r"""
2025-07-01 17:49:07.247 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.247 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.247 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.247 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.247
2025-07-01 17:49:07.247 Example:
2025-07-01 17:49:07.247
2025-07-01 17:49:07.247 >>> d = Differ()
2025-07-01 17:49:07.247 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.247 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.248 >>> print(''.join(results), end="")
2025-07-01 17:49:07.248 - abcDefghiJkl
2025-07-01 17:49:07.248 + abcdefGhijkl
2025-07-01 17:49:07.248 """
2025-07-01 17:49:07.248
2025-07-01 17:49:07.248 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.248 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.248 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.248 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.248 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.248
2025-07-01 17:49:07.248 # search for the pair that matches best without being identical
2025-07-01 17:49:07.248 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.248 # on junk -- unless we have to)
2025-07-01 17:49:07.248 for j in range(blo, bhi):
2025-07-01 17:49:07.248 bj = b[j]
2025-07-01 17:49:07.248 cruncher.set_seq2(bj)
2025-07-01 17:49:07.248 for i in range(alo, ahi):
2025-07-01 17:49:07.248 ai = a[i]
2025-07-01 17:49:07.249 if ai == bj:
2025-07-01 17:49:07.249 if eqi is None:
2025-07-01 17:49:07.249 eqi, eqj = i, j
2025-07-01 17:49:07.249 continue
2025-07-01 17:49:07.249 cruncher.set_seq1(ai)
2025-07-01 17:49:07.249 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.249 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.249 # compares by a factor of 3.
2025-07-01 17:49:07.249 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.249 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.249 # of the computation is cached by cruncher
2025-07-01 17:49:07.249 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.249 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.249 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.249 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.249 if best_ratio < cutoff:
2025-07-01 17:49:07.249 # no non-identical "pretty close" pair
2025-07-01 17:49:07.249 if eqi is None:
2025-07-01 17:49:07.249 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.249 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.249 return
2025-07-01 17:49:07.250 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.250 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.253 else:
2025-07-01 17:49:07.253 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.253 eqi = None
2025-07-01 17:49:07.253
2025-07-01 17:49:07.253 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.253 # identical
2025-07-01 17:49:07.253
2025-07-01 17:49:07.253 # pump out diffs from before the synch point
2025-07-01 17:49:07.253 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.253
2025-07-01 17:49:07.253 # do intraline marking on the synch pair
2025-07-01 17:49:07.253 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.253 if eqi is None:
2025-07-01 17:49:07.253 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.253 atags = btags = ""
2025-07-01 17:49:07.253 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.253 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.253 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.254 if tag == 'replace':
2025-07-01 17:49:07.254 atags += '^' * la
2025-07-01 17:49:07.254 btags += '^' * lb
2025-07-01 17:49:07.254 elif tag == 'delete':
2025-07-01 17:49:07.254 atags += '-' * la
2025-07-01 17:49:07.254 elif tag == 'insert':
2025-07-01 17:49:07.254 btags += '+' * lb
2025-07-01 17:49:07.254 elif tag == 'equal':
2025-07-01 17:49:07.254 atags += ' ' * la
2025-07-01 17:49:07.254 btags += ' ' * lb
2025-07-01 17:49:07.254 else:
2025-07-01 17:49:07.254 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.254 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.254 else:
2025-07-01 17:49:07.254 # the synch pair is identical
2025-07-01 17:49:07.254 yield ' ' + aelt
2025-07-01 17:49:07.254
2025-07-01 17:49:07.254 # pump out diffs from after the synch point
2025-07-01 17:49:07.254 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.254
2025-07-01 17:49:07.255 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.255 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.255
2025-07-01 17:49:07.255 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.255 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.255 alo = 243, ahi = 1101
2025-07-01 17:49:07.255 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.255 blo = 243, bhi = 1101
2025-07-01 17:49:07.255
2025-07-01 17:49:07.255 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.255 g = []
2025-07-01 17:49:07.255 if alo < ahi:
2025-07-01 17:49:07.255 if blo < bhi:
2025-07-01 17:49:07.255 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.255 else:
2025-07-01 17:49:07.255 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.255 elif blo < bhi:
2025-07-01 17:49:07.255 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.255
2025-07-01 17:49:07.255 > yield from g
2025-07-01 17:49:07.256
2025-07-01 17:49:07.256 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.256 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.256
2025-07-01 17:49:07.256 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.256 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.256 alo = 243, ahi = 1101
2025-07-01 17:49:07.256 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.256 blo = 243, bhi = 1101
2025-07-01 17:49:07.256
2025-07-01 17:49:07.256 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.256 r"""
2025-07-01 17:49:07.256 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.256 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.256 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.256 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.256
2025-07-01 17:49:07.256 Example:
2025-07-01 17:49:07.256
2025-07-01 17:49:07.256 >>> d = Differ()
2025-07-01 17:49:07.257 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.257 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.257 >>> print(''.join(results), end="")
2025-07-01 17:49:07.257 - abcDefghiJkl
2025-07-01 17:49:07.257 + abcdefGhijkl
2025-07-01 17:49:07.257 """
2025-07-01 17:49:07.257
2025-07-01 17:49:07.257 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.257 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.257 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.257 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.257 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.257
2025-07-01 17:49:07.257 # search for the pair that matches best without being identical
2025-07-01 17:49:07.257 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.257 # on junk -- unless we have to)
2025-07-01 17:49:07.257 for j in range(blo, bhi):
2025-07-01 17:49:07.257 bj = b[j]
2025-07-01 17:49:07.257 cruncher.set_seq2(bj)
2025-07-01 17:49:07.258 for i in range(alo, ahi):
2025-07-01 17:49:07.258 ai = a[i]
2025-07-01 17:49:07.258 if ai == bj:
2025-07-01 17:49:07.258 if eqi is None:
2025-07-01 17:49:07.258 eqi, eqj = i, j
2025-07-01 17:49:07.258 continue
2025-07-01 17:49:07.258 cruncher.set_seq1(ai)
2025-07-01 17:49:07.258 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.258 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.258 # compares by a factor of 3.
2025-07-01 17:49:07.258 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.258 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.258 # of the computation is cached by cruncher
2025-07-01 17:49:07.258 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.258 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.258 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.258 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.258 if best_ratio < cutoff:
2025-07-01 17:49:07.258 # no non-identical "pretty close" pair
2025-07-01 17:49:07.258 if eqi is None:
2025-07-01 17:49:07.258 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.259 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.259 return
2025-07-01 17:49:07.259 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.259 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.259 else:
2025-07-01 17:49:07.259 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.259 eqi = None
2025-07-01 17:49:07.259
2025-07-01 17:49:07.259 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.259 # identical
2025-07-01 17:49:07.259
2025-07-01 17:49:07.259 # pump out diffs from before the synch point
2025-07-01 17:49:07.259 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.259
2025-07-01 17:49:07.259 # do intraline marking on the synch pair
2025-07-01 17:49:07.259 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.259 if eqi is None:
2025-07-01 17:49:07.259 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.259 atags = btags = ""
2025-07-01 17:49:07.259 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.259 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.259 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.260 if tag == 'replace':
2025-07-01 17:49:07.260 atags += '^' * la
2025-07-01 17:49:07.260 btags += '^' * lb
2025-07-01 17:49:07.260 elif tag == 'delete':
2025-07-01 17:49:07.260 atags += '-' * la
2025-07-01 17:49:07.260 elif tag == 'insert':
2025-07-01 17:49:07.260 btags += '+' * lb
2025-07-01 17:49:07.260 elif tag == 'equal':
2025-07-01 17:49:07.260 atags += ' ' * la
2025-07-01 17:49:07.260 btags += ' ' * lb
2025-07-01 17:49:07.260 else:
2025-07-01 17:49:07.260 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.260 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.260 else:
2025-07-01 17:49:07.260 # the synch pair is identical
2025-07-01 17:49:07.260 yield ' ' + aelt
2025-07-01 17:49:07.260
2025-07-01 17:49:07.260 # pump out diffs from after the synch point
2025-07-01 17:49:07.260 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.260
2025-07-01 17:49:07.260 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.261 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.261
2025-07-01 17:49:07.261 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.261 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.261 alo = 246, ahi = 1101
2025-07-01 17:49:07.261 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.261 blo = 246, bhi = 1101
2025-07-01 17:49:07.261
2025-07-01 17:49:07.261 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.261 g = []
2025-07-01 17:49:07.261 if alo < ahi:
2025-07-01 17:49:07.261 if blo < bhi:
2025-07-01 17:49:07.261 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.261 else:
2025-07-01 17:49:07.261 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.261 elif blo < bhi:
2025-07-01 17:49:07.261 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.261
2025-07-01 17:49:07.261 > yield from g
2025-07-01 17:49:07.261
2025-07-01 17:49:07.261 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.261 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.262
2025-07-01 17:49:07.262 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.262 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.262 alo = 246, ahi = 1101
2025-07-01 17:49:07.262 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.262 blo = 246, bhi = 1101
2025-07-01 17:49:07.262
2025-07-01 17:49:07.262 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.262 r"""
2025-07-01 17:49:07.262 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.262 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.262 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.262 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.262
2025-07-01 17:49:07.262 Example:
2025-07-01 17:49:07.262
2025-07-01 17:49:07.262 >>> d = Differ()
2025-07-01 17:49:07.262 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.263 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.263 >>> print(''.join(results), end="")
2025-07-01 17:49:07.263 - abcDefghiJkl
2025-07-01 17:49:07.263 + abcdefGhijkl
2025-07-01 17:49:07.263 """
2025-07-01 17:49:07.263
2025-07-01 17:49:07.263 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.263 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.263 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.263 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.263 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.263
2025-07-01 17:49:07.263 # search for the pair that matches best without being identical
2025-07-01 17:49:07.263 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.263 # on junk -- unless we have to)
2025-07-01 17:49:07.263 for j in range(blo, bhi):
2025-07-01 17:49:07.263 bj = b[j]
2025-07-01 17:49:07.263 cruncher.set_seq2(bj)
2025-07-01 17:49:07.263 for i in range(alo, ahi):
2025-07-01 17:49:07.264 ai = a[i]
2025-07-01 17:49:07.264 if ai == bj:
2025-07-01 17:49:07.264 if eqi is None:
2025-07-01 17:49:07.264 eqi, eqj = i, j
2025-07-01 17:49:07.264 continue
2025-07-01 17:49:07.264 cruncher.set_seq1(ai)
2025-07-01 17:49:07.264 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.264 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.264 # compares by a factor of 3.
2025-07-01 17:49:07.264 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.264 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.264 # of the computation is cached by cruncher
2025-07-01 17:49:07.264 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.264 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.264 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.264 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.264 if best_ratio < cutoff:
2025-07-01 17:49:07.265 # no non-identical "pretty close" pair
2025-07-01 17:49:07.265 if eqi is None:
2025-07-01 17:49:07.265 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.265 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.265 return
2025-07-01 17:49:07.265 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.265 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.265 else:
2025-07-01 17:49:07.265 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.265 eqi = None
2025-07-01 17:49:07.265
2025-07-01 17:49:07.265 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.265 # identical
2025-07-01 17:49:07.265
2025-07-01 17:49:07.265 # pump out diffs from before the synch point
2025-07-01 17:49:07.265 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.265
2025-07-01 17:49:07.270 # do intraline marking on the synch pair
2025-07-01 17:49:07.270 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.271 if eqi is None:
2025-07-01 17:49:07.271 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.271 atags = btags = ""
2025-07-01 17:49:07.271 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.271 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.271 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.271 if tag == 'replace':
2025-07-01 17:49:07.271 atags += '^' * la
2025-07-01 17:49:07.271 btags += '^' * lb
2025-07-01 17:49:07.271 elif tag == 'delete':
2025-07-01 17:49:07.271 atags += '-' * la
2025-07-01 17:49:07.271 elif tag == 'insert':
2025-07-01 17:49:07.271 btags += '+' * lb
2025-07-01 17:49:07.271 elif tag == 'equal':
2025-07-01 17:49:07.271 atags += ' ' * la
2025-07-01 17:49:07.271 btags += ' ' * lb
2025-07-01 17:49:07.272 else:
2025-07-01 17:49:07.272 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.272 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.272 else:
2025-07-01 17:49:07.272 # the synch pair is identical
2025-07-01 17:49:07.272 yield ' ' + aelt
2025-07-01 17:49:07.272
2025-07-01 17:49:07.272 # pump out diffs from after the synch point
2025-07-01 17:49:07.272 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.272
2025-07-01 17:49:07.272 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.272 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.272
2025-07-01 17:49:07.272 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.272 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.272 alo = 247, ahi = 1101
2025-07-01 17:49:07.273 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.273 blo = 247, bhi = 1101
2025-07-01 17:49:07.273
2025-07-01 17:49:07.273 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.273 g = []
2025-07-01 17:49:07.273 if alo < ahi:
2025-07-01 17:49:07.273 if blo < bhi:
2025-07-01 17:49:07.273 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.273 else:
2025-07-01 17:49:07.273 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.273 elif blo < bhi:
2025-07-01 17:49:07.273 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.273
2025-07-01 17:49:07.273 > yield from g
2025-07-01 17:49:07.273
2025-07-01 17:49:07.273 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.273 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.274
2025-07-01 17:49:07.274 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.274 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.274 alo = 247, ahi = 1101
2025-07-01 17:49:07.274 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.274 blo = 247, bhi = 1101
2025-07-01 17:49:07.274
2025-07-01 17:49:07.274 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.274 r"""
2025-07-01 17:49:07.274 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.274 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.274 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.274 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.274
2025-07-01 17:49:07.274 Example:
2025-07-01 17:49:07.274
2025-07-01 17:49:07.274 >>> d = Differ()
2025-07-01 17:49:07.275 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.275 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.275 >>> print(''.join(results), end="")
2025-07-01 17:49:07.275 - abcDefghiJkl
2025-07-01 17:49:07.275 + abcdefGhijkl
2025-07-01 17:49:07.275 """
2025-07-01 17:49:07.275
2025-07-01 17:49:07.275 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.275 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.275 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.275 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.275 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.275
2025-07-01 17:49:07.275 # search for the pair that matches best without being identical
2025-07-01 17:49:07.275 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.276 # on junk -- unless we have to)
2025-07-01 17:49:07.276 for j in range(blo, bhi):
2025-07-01 17:49:07.276 bj = b[j]
2025-07-01 17:49:07.276 cruncher.set_seq2(bj)
2025-07-01 17:49:07.276 for i in range(alo, ahi):
2025-07-01 17:49:07.276 ai = a[i]
2025-07-01 17:49:07.276 if ai == bj:
2025-07-01 17:49:07.276 if eqi is None:
2025-07-01 17:49:07.276 eqi, eqj = i, j
2025-07-01 17:49:07.276 continue
2025-07-01 17:49:07.276 cruncher.set_seq1(ai)
2025-07-01 17:49:07.276 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.276 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.276 # compares by a factor of 3.
2025-07-01 17:49:07.276 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.276 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.276 # of the computation is cached by cruncher
2025-07-01 17:49:07.277 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.277 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.277 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.277 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.277 if best_ratio < cutoff:
2025-07-01 17:49:07.277 # no non-identical "pretty close" pair
2025-07-01 17:49:07.277 if eqi is None:
2025-07-01 17:49:07.277 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.277 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.277 return
2025-07-01 17:49:07.277 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.277 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.277 else:
2025-07-01 17:49:07.277 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.277 eqi = None
2025-07-01 17:49:07.277
2025-07-01 17:49:07.278 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.278 # identical
2025-07-01 17:49:07.278
2025-07-01 17:49:07.278 # pump out diffs from before the synch point
2025-07-01 17:49:07.278 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.278
2025-07-01 17:49:07.278 # do intraline marking on the synch pair
2025-07-01 17:49:07.278 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.278 if eqi is None:
2025-07-01 17:49:07.278 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.278 atags = btags = ""
2025-07-01 17:49:07.278 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.278 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.278 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.278 if tag == 'replace':
2025-07-01 17:49:07.278 atags += '^' * la
2025-07-01 17:49:07.278 btags += '^' * lb
2025-07-01 17:49:07.279 elif tag == 'delete':
2025-07-01 17:49:07.279 atags += '-' * la
2025-07-01 17:49:07.279 elif tag == 'insert':
2025-07-01 17:49:07.279 btags += '+' * lb
2025-07-01 17:49:07.279 elif tag == 'equal':
2025-07-01 17:49:07.279 atags += ' ' * la
2025-07-01 17:49:07.279 btags += ' ' * lb
2025-07-01 17:49:07.279 else:
2025-07-01 17:49:07.279 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.279 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.279 else:
2025-07-01 17:49:07.279 # the synch pair is identical
2025-07-01 17:49:07.279 yield ' ' + aelt
2025-07-01 17:49:07.279
2025-07-01 17:49:07.279 # pump out diffs from after the synch point
2025-07-01 17:49:07.279 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.279
2025-07-01 17:49:07.280 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.280 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.280
2025-07-01 17:49:07.280 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.280 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.280 alo = 248, ahi = 1101
2025-07-01 17:49:07.280 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.280 blo = 248, bhi = 1101
2025-07-01 17:49:07.280
2025-07-01 17:49:07.280 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.280 g = []
2025-07-01 17:49:07.280 if alo < ahi:
2025-07-01 17:49:07.280 if blo < bhi:
2025-07-01 17:49:07.280 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.280 else:
2025-07-01 17:49:07.280 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.281 elif blo < bhi:
2025-07-01 17:49:07.281 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.281
2025-07-01 17:49:07.281 > yield from g
2025-07-01 17:49:07.281
2025-07-01 17:49:07.281 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.281 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.281
2025-07-01 17:49:07.281 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.281 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.281 alo = 248, ahi = 1101
2025-07-01 17:49:07.281 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.281 blo = 248, bhi = 1101
2025-07-01 17:49:07.281
2025-07-01 17:49:07.281 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.281 r"""
2025-07-01 17:49:07.285 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.285 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.285 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.285 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.285
2025-07-01 17:49:07.285 Example:
2025-07-01 17:49:07.285
2025-07-01 17:49:07.285 >>> d = Differ()
2025-07-01 17:49:07.285 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.285 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.285 >>> print(''.join(results), end="")
2025-07-01 17:49:07.286 - abcDefghiJkl
2025-07-01 17:49:07.286 + abcdefGhijkl
2025-07-01 17:49:07.286 """
2025-07-01 17:49:07.286
2025-07-01 17:49:07.286 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.286 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.286 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.286 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.286 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.286
2025-07-01 17:49:07.286 # search for the pair that matches best without being identical
2025-07-01 17:49:07.286 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.286 # on junk -- unless we have to)
2025-07-01 17:49:07.286 for j in range(blo, bhi):
2025-07-01 17:49:07.286 bj = b[j]
2025-07-01 17:49:07.287 cruncher.set_seq2(bj)
2025-07-01 17:49:07.287 for i in range(alo, ahi):
2025-07-01 17:49:07.287 ai = a[i]
2025-07-01 17:49:07.287 if ai == bj:
2025-07-01 17:49:07.287 if eqi is None:
2025-07-01 17:49:07.287 eqi, eqj = i, j
2025-07-01 17:49:07.287 continue
2025-07-01 17:49:07.287 cruncher.set_seq1(ai)
2025-07-01 17:49:07.287 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.287 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.287 # compares by a factor of 3.
2025-07-01 17:49:07.287 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.287 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.287 # of the computation is cached by cruncher
2025-07-01 17:49:07.287 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.287 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.287 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.288 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.288 if best_ratio < cutoff:
2025-07-01 17:49:07.288 # no non-identical "pretty close" pair
2025-07-01 17:49:07.288 if eqi is None:
2025-07-01 17:49:07.288 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.288 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.288 return
2025-07-01 17:49:07.288 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.288 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.288 else:
2025-07-01 17:49:07.288 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.288 eqi = None
2025-07-01 17:49:07.288
2025-07-01 17:49:07.288 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.288 # identical
2025-07-01 17:49:07.288
2025-07-01 17:49:07.288 # pump out diffs from before the synch point
2025-07-01 17:49:07.289 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.289
2025-07-01 17:49:07.289 # do intraline marking on the synch pair
2025-07-01 17:49:07.289 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.289 if eqi is None:
2025-07-01 17:49:07.289 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.289 atags = btags = ""
2025-07-01 17:49:07.289 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.289 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.289 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.289 if tag == 'replace':
2025-07-01 17:49:07.289 atags += '^' * la
2025-07-01 17:49:07.289 btags += '^' * lb
2025-07-01 17:49:07.289 elif tag == 'delete':
2025-07-01 17:49:07.289 atags += '-' * la
2025-07-01 17:49:07.289 elif tag == 'insert':
2025-07-01 17:49:07.289 btags += '+' * lb
2025-07-01 17:49:07.290 elif tag == 'equal':
2025-07-01 17:49:07.290 atags += ' ' * la
2025-07-01 17:49:07.290 btags += ' ' * lb
2025-07-01 17:49:07.290 else:
2025-07-01 17:49:07.290 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.290 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.290 else:
2025-07-01 17:49:07.290 # the synch pair is identical
2025-07-01 17:49:07.290 yield ' ' + aelt
2025-07-01 17:49:07.290
2025-07-01 17:49:07.290 # pump out diffs from after the synch point
2025-07-01 17:49:07.290 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.290
2025-07-01 17:49:07.290 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.290 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.290
2025-07-01 17:49:07.291 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.291 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.291 alo = 249, ahi = 1101
2025-07-01 17:49:07.291 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.291 blo = 249, bhi = 1101
2025-07-01 17:49:07.291
2025-07-01 17:49:07.291 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.291 g = []
2025-07-01 17:49:07.291 if alo < ahi:
2025-07-01 17:49:07.291 if blo < bhi:
2025-07-01 17:49:07.291 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.291 else:
2025-07-01 17:49:07.291 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.291 elif blo < bhi:
2025-07-01 17:49:07.291 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.291
2025-07-01 17:49:07.292 > yield from g
2025-07-01 17:49:07.292
2025-07-01 17:49:07.292 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.292 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.292
2025-07-01 17:49:07.292 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.292 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.292 alo = 249, ahi = 1101
2025-07-01 17:49:07.292 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.292 blo = 249, bhi = 1101
2025-07-01 17:49:07.292
2025-07-01 17:49:07.292 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.292 r"""
2025-07-01 17:49:07.292 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.293 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.293 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.293 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.293
2025-07-01 17:49:07.293 Example:
2025-07-01 17:49:07.293
2025-07-01 17:49:07.293 >>> d = Differ()
2025-07-01 17:49:07.293 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.293 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.293 >>> print(''.join(results), end="")
2025-07-01 17:49:07.293 - abcDefghiJkl
2025-07-01 17:49:07.293 + abcdefGhijkl
2025-07-01 17:49:07.293 """
2025-07-01 17:49:07.293
2025-07-01 17:49:07.294 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.294 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.294 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.294 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.294 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.294
2025-07-01 17:49:07.294 # search for the pair that matches best without being identical
2025-07-01 17:49:07.294 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.294 # on junk -- unless we have to)
2025-07-01 17:49:07.294 for j in range(blo, bhi):
2025-07-01 17:49:07.294 bj = b[j]
2025-07-01 17:49:07.294 cruncher.set_seq2(bj)
2025-07-01 17:49:07.294 for i in range(alo, ahi):
2025-07-01 17:49:07.294 ai = a[i]
2025-07-01 17:49:07.294 if ai == bj:
2025-07-01 17:49:07.294 if eqi is None:
2025-07-01 17:49:07.295 eqi, eqj = i, j
2025-07-01 17:49:07.295 continue
2025-07-01 17:49:07.295 cruncher.set_seq1(ai)
2025-07-01 17:49:07.295 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.295 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.295 # compares by a factor of 3.
2025-07-01 17:49:07.295 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.295 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.295 # of the computation is cached by cruncher
2025-07-01 17:49:07.295 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.295 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.295 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.295 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.295 if best_ratio < cutoff:
2025-07-01 17:49:07.295 # no non-identical "pretty close" pair
2025-07-01 17:49:07.295 if eqi is None:
2025-07-01 17:49:07.295 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.296 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.296 return
2025-07-01 17:49:07.296 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.296 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.296 else:
2025-07-01 17:49:07.296 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.296 eqi = None
2025-07-01 17:49:07.296
2025-07-01 17:49:07.296 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.296 # identical
2025-07-01 17:49:07.296
2025-07-01 17:49:07.296 # pump out diffs from before the synch point
2025-07-01 17:49:07.296 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.296
2025-07-01 17:49:07.296 # do intraline marking on the synch pair
2025-07-01 17:49:07.296 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.296 if eqi is None:
2025-07-01 17:49:07.297 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.301 atags = btags = ""
2025-07-01 17:49:07.301 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.301 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.302 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.302 if tag == 'replace':
2025-07-01 17:49:07.302 atags += '^' * la
2025-07-01 17:49:07.302 btags += '^' * lb
2025-07-01 17:49:07.302 elif tag == 'delete':
2025-07-01 17:49:07.302 atags += '-' * la
2025-07-01 17:49:07.302 elif tag == 'insert':
2025-07-01 17:49:07.302 btags += '+' * lb
2025-07-01 17:49:07.302 elif tag == 'equal':
2025-07-01 17:49:07.302 atags += ' ' * la
2025-07-01 17:49:07.302 btags += ' ' * lb
2025-07-01 17:49:07.302 else:
2025-07-01 17:49:07.302 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.302 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.302 else:
2025-07-01 17:49:07.302 # the synch pair is identical
2025-07-01 17:49:07.303 yield ' ' + aelt
2025-07-01 17:49:07.303
2025-07-01 17:49:07.303 # pump out diffs from after the synch point
2025-07-01 17:49:07.303 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.303
2025-07-01 17:49:07.303 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.303 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.303
2025-07-01 17:49:07.303 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.303 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.303 alo = 250, ahi = 1101
2025-07-01 17:49:07.303 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.303 blo = 250, bhi = 1101
2025-07-01 17:49:07.303
2025-07-01 17:49:07.303 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.303 g = []
2025-07-01 17:49:07.304 if alo < ahi:
2025-07-01 17:49:07.304 if blo < bhi:
2025-07-01 17:49:07.304 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.304 else:
2025-07-01 17:49:07.304 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.304 elif blo < bhi:
2025-07-01 17:49:07.304 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.304
2025-07-01 17:49:07.304 > yield from g
2025-07-01 17:49:07.304
2025-07-01 17:49:07.304 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.304 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.304
2025-07-01 17:49:07.304 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.304 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.304 alo = 250, ahi = 1101
2025-07-01 17:49:07.304 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.305 blo = 250, bhi = 1101
2025-07-01 17:49:07.305
2025-07-01 17:49:07.305 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.305 r"""
2025-07-01 17:49:07.305 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.305 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.305 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.305 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.305
2025-07-01 17:49:07.305 Example:
2025-07-01 17:49:07.305
2025-07-01 17:49:07.305 >>> d = Differ()
2025-07-01 17:49:07.305 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.305 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.305 >>> print(''.join(results), end="")
2025-07-01 17:49:07.306 - abcDefghiJkl
2025-07-01 17:49:07.306 + abcdefGhijkl
2025-07-01 17:49:07.306 """
2025-07-01 17:49:07.306
2025-07-01 17:49:07.306 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.306 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.306 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.306 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.306 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.306
2025-07-01 17:49:07.306 # search for the pair that matches best without being identical
2025-07-01 17:49:07.306 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.306 # on junk -- unless we have to)
2025-07-01 17:49:07.306 for j in range(blo, bhi):
2025-07-01 17:49:07.306 bj = b[j]
2025-07-01 17:49:07.307 cruncher.set_seq2(bj)
2025-07-01 17:49:07.307 for i in range(alo, ahi):
2025-07-01 17:49:07.307 ai = a[i]
2025-07-01 17:49:07.307 if ai == bj:
2025-07-01 17:49:07.307 if eqi is None:
2025-07-01 17:49:07.307 eqi, eqj = i, j
2025-07-01 17:49:07.307 continue
2025-07-01 17:49:07.307 cruncher.set_seq1(ai)
2025-07-01 17:49:07.307 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.307 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.307 # compares by a factor of 3.
2025-07-01 17:49:07.307 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.307 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.307 # of the computation is cached by cruncher
2025-07-01 17:49:07.307 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.308 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.308 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.308 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.308 if best_ratio < cutoff:
2025-07-01 17:49:07.308 # no non-identical "pretty close" pair
2025-07-01 17:49:07.308 if eqi is None:
2025-07-01 17:49:07.308 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.308 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.308 return
2025-07-01 17:49:07.308 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.308 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.308 else:
2025-07-01 17:49:07.308 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.308 eqi = None
2025-07-01 17:49:07.308
2025-07-01 17:49:07.308 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.309 # identical
2025-07-01 17:49:07.309
2025-07-01 17:49:07.309 # pump out diffs from before the synch point
2025-07-01 17:49:07.309 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.309
2025-07-01 17:49:07.309 # do intraline marking on the synch pair
2025-07-01 17:49:07.309 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.309 if eqi is None:
2025-07-01 17:49:07.309 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.309 atags = btags = ""
2025-07-01 17:49:07.309 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.309 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.309 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.309 if tag == 'replace':
2025-07-01 17:49:07.309 atags += '^' * la
2025-07-01 17:49:07.309 btags += '^' * lb
2025-07-01 17:49:07.310 elif tag == 'delete':
2025-07-01 17:49:07.310 atags += '-' * la
2025-07-01 17:49:07.310 elif tag == 'insert':
2025-07-01 17:49:07.310 btags += '+' * lb
2025-07-01 17:49:07.310 elif tag == 'equal':
2025-07-01 17:49:07.310 atags += ' ' * la
2025-07-01 17:49:07.310 btags += ' ' * lb
2025-07-01 17:49:07.310 else:
2025-07-01 17:49:07.310 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.310 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.310 else:
2025-07-01 17:49:07.310 # the synch pair is identical
2025-07-01 17:49:07.310 yield ' ' + aelt
2025-07-01 17:49:07.310
2025-07-01 17:49:07.310 # pump out diffs from after the synch point
2025-07-01 17:49:07.310 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.310
2025-07-01 17:49:07.310 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.311 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.311
2025-07-01 17:49:07.311 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.311 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.311 alo = 251, ahi = 1101
2025-07-01 17:49:07.311 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.311 blo = 251, bhi = 1101
2025-07-01 17:49:07.311
2025-07-01 17:49:07.311 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.311 g = []
2025-07-01 17:49:07.311 if alo < ahi:
2025-07-01 17:49:07.311 if blo < bhi:
2025-07-01 17:49:07.311 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.311 else:
2025-07-01 17:49:07.311 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.312 elif blo < bhi:
2025-07-01 17:49:07.312 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.312
2025-07-01 17:49:07.312 > yield from g
2025-07-01 17:49:07.312
2025-07-01 17:49:07.312 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.312 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.312
2025-07-01 17:49:07.312 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.312 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.312 alo = 251, ahi = 1101
2025-07-01 17:49:07.312 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.312 blo = 251, bhi = 1101
2025-07-01 17:49:07.312
2025-07-01 17:49:07.312 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.312 r"""
2025-07-01 17:49:07.312 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.313 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.316 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.316 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.316
2025-07-01 17:49:07.316 Example:
2025-07-01 17:49:07.316
2025-07-01 17:49:07.316 >>> d = Differ()
2025-07-01 17:49:07.316 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.316 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.316 >>> print(''.join(results), end="")
2025-07-01 17:49:07.316 - abcDefghiJkl
2025-07-01 17:49:07.316 + abcdefGhijkl
2025-07-01 17:49:07.317 """
2025-07-01 17:49:07.317
2025-07-01 17:49:07.317 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.317 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.317 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.317 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.317 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.317
2025-07-01 17:49:07.317 # search for the pair that matches best without being identical
2025-07-01 17:49:07.317 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.317 # on junk -- unless we have to)
2025-07-01 17:49:07.317 for j in range(blo, bhi):
2025-07-01 17:49:07.317 bj = b[j]
2025-07-01 17:49:07.317 cruncher.set_seq2(bj)
2025-07-01 17:49:07.317 for i in range(alo, ahi):
2025-07-01 17:49:07.318 ai = a[i]
2025-07-01 17:49:07.318 if ai == bj:
2025-07-01 17:49:07.318 if eqi is None:
2025-07-01 17:49:07.318 eqi, eqj = i, j
2025-07-01 17:49:07.318 continue
2025-07-01 17:49:07.318 cruncher.set_seq1(ai)
2025-07-01 17:49:07.318 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.318 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.318 # compares by a factor of 3.
2025-07-01 17:49:07.318 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.318 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.318 # of the computation is cached by cruncher
2025-07-01 17:49:07.318 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.318 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.318 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.318 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.319 if best_ratio < cutoff:
2025-07-01 17:49:07.319 # no non-identical "pretty close" pair
2025-07-01 17:49:07.319 if eqi is None:
2025-07-01 17:49:07.319 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.319 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.319 return
2025-07-01 17:49:07.319 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.319 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.319 else:
2025-07-01 17:49:07.319 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.319 eqi = None
2025-07-01 17:49:07.319
2025-07-01 17:49:07.319 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.319 # identical
2025-07-01 17:49:07.319
2025-07-01 17:49:07.320 # pump out diffs from before the synch point
2025-07-01 17:49:07.320 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.320
2025-07-01 17:49:07.320 # do intraline marking on the synch pair
2025-07-01 17:49:07.320 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.320 if eqi is None:
2025-07-01 17:49:07.320 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.320 atags = btags = ""
2025-07-01 17:49:07.320 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.320 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.320 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.320 if tag == 'replace':
2025-07-01 17:49:07.320 atags += '^' * la
2025-07-01 17:49:07.320 btags += '^' * lb
2025-07-01 17:49:07.320 elif tag == 'delete':
2025-07-01 17:49:07.320 atags += '-' * la
2025-07-01 17:49:07.320 elif tag == 'insert':
2025-07-01 17:49:07.321 btags += '+' * lb
2025-07-01 17:49:07.321 elif tag == 'equal':
2025-07-01 17:49:07.321 atags += ' ' * la
2025-07-01 17:49:07.321 btags += ' ' * lb
2025-07-01 17:49:07.321 else:
2025-07-01 17:49:07.321 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.321 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.321 else:
2025-07-01 17:49:07.321 # the synch pair is identical
2025-07-01 17:49:07.321 yield ' ' + aelt
2025-07-01 17:49:07.321
2025-07-01 17:49:07.321 # pump out diffs from after the synch point
2025-07-01 17:49:07.321 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.321
2025-07-01 17:49:07.321 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.322 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.322
2025-07-01 17:49:07.322 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.322 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.322 alo = 252, ahi = 1101
2025-07-01 17:49:07.322 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.322 blo = 252, bhi = 1101
2025-07-01 17:49:07.322
2025-07-01 17:49:07.322 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.322 g = []
2025-07-01 17:49:07.322 if alo < ahi:
2025-07-01 17:49:07.322 if blo < bhi:
2025-07-01 17:49:07.322 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.322 else:
2025-07-01 17:49:07.322 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.322 elif blo < bhi:
2025-07-01 17:49:07.322 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.323
2025-07-01 17:49:07.323 > yield from g
2025-07-01 17:49:07.323
2025-07-01 17:49:07.323 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.323 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.323
2025-07-01 17:49:07.323 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.323 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.323 alo = 252, ahi = 1101
2025-07-01 17:49:07.323 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.323 blo = 252, bhi = 1101
2025-07-01 17:49:07.323
2025-07-01 17:49:07.323 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.323 r"""
2025-07-01 17:49:07.323 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.323 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.323 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.324 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.324
2025-07-01 17:49:07.324 Example:
2025-07-01 17:49:07.324
2025-07-01 17:49:07.324 >>> d = Differ()
2025-07-01 17:49:07.324 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.324 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.324 >>> print(''.join(results), end="")
2025-07-01 17:49:07.324 - abcDefghiJkl
2025-07-01 17:49:07.324 + abcdefGhijkl
2025-07-01 17:49:07.324 """
2025-07-01 17:49:07.324
2025-07-01 17:49:07.324 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.324 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.324 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.325 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.325 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.325
2025-07-01 17:49:07.325 # search for the pair that matches best without being identical
2025-07-01 17:49:07.325 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.325 # on junk -- unless we have to)
2025-07-01 17:49:07.325 for j in range(blo, bhi):
2025-07-01 17:49:07.325 bj = b[j]
2025-07-01 17:49:07.325 cruncher.set_seq2(bj)
2025-07-01 17:49:07.325 for i in range(alo, ahi):
2025-07-01 17:49:07.325 ai = a[i]
2025-07-01 17:49:07.325 if ai == bj:
2025-07-01 17:49:07.325 if eqi is None:
2025-07-01 17:49:07.325 eqi, eqj = i, j
2025-07-01 17:49:07.325 continue
2025-07-01 17:49:07.325 cruncher.set_seq1(ai)
2025-07-01 17:49:07.326 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.326 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.326 # compares by a factor of 3.
2025-07-01 17:49:07.326 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.326 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.326 # of the computation is cached by cruncher
2025-07-01 17:49:07.326 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.326 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.326 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.326 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.326 if best_ratio < cutoff:
2025-07-01 17:49:07.326 # no non-identical "pretty close" pair
2025-07-01 17:49:07.326 if eqi is None:
2025-07-01 17:49:07.326 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.326 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.326 return
2025-07-01 17:49:07.327 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.327 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.327 else:
2025-07-01 17:49:07.327 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.327 eqi = None
2025-07-01 17:49:07.327
2025-07-01 17:49:07.327 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.327 # identical
2025-07-01 17:49:07.327
2025-07-01 17:49:07.327 # pump out diffs from before the synch point
2025-07-01 17:49:07.327 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.327
2025-07-01 17:49:07.327 # do intraline marking on the synch pair
2025-07-01 17:49:07.327 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.327 if eqi is None:
2025-07-01 17:49:07.327 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.327 atags = btags = ""
2025-07-01 17:49:07.333 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.333 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.333 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.333 if tag == 'replace':
2025-07-01 17:49:07.333 atags += '^' * la
2025-07-01 17:49:07.333 btags += '^' * lb
2025-07-01 17:49:07.333 elif tag == 'delete':
2025-07-01 17:49:07.333 atags += '-' * la
2025-07-01 17:49:07.333 elif tag == 'insert':
2025-07-01 17:49:07.333 btags += '+' * lb
2025-07-01 17:49:07.333 elif tag == 'equal':
2025-07-01 17:49:07.333 atags += ' ' * la
2025-07-01 17:49:07.333 btags += ' ' * lb
2025-07-01 17:49:07.334 else:
2025-07-01 17:49:07.334 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.334 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.334 else:
2025-07-01 17:49:07.334 # the synch pair is identical
2025-07-01 17:49:07.334 yield ' ' + aelt
2025-07-01 17:49:07.334
2025-07-01 17:49:07.334 # pump out diffs from after the synch point
2025-07-01 17:49:07.334 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.334
2025-07-01 17:49:07.334 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.334 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.334
2025-07-01 17:49:07.334 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.334 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.334 alo = 253, ahi = 1101
2025-07-01 17:49:07.334 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.335 blo = 253, bhi = 1101
2025-07-01 17:49:07.335
2025-07-01 17:49:07.335 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.335 g = []
2025-07-01 17:49:07.335 if alo < ahi:
2025-07-01 17:49:07.335 if blo < bhi:
2025-07-01 17:49:07.335 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.335 else:
2025-07-01 17:49:07.335 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.335 elif blo < bhi:
2025-07-01 17:49:07.335 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.335
2025-07-01 17:49:07.335 > yield from g
2025-07-01 17:49:07.335
2025-07-01 17:49:07.335 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.335 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.336
2025-07-01 17:49:07.336 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.336 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.336 alo = 253, ahi = 1101
2025-07-01 17:49:07.336 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.336 blo = 253, bhi = 1101
2025-07-01 17:49:07.336
2025-07-01 17:49:07.336 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.336 r"""
2025-07-01 17:49:07.336 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.336 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.336 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.336 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.336
2025-07-01 17:49:07.336 Example:
2025-07-01 17:49:07.336
2025-07-01 17:49:07.337 >>> d = Differ()
2025-07-01 17:49:07.337 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.337 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.337 >>> print(''.join(results), end="")
2025-07-01 17:49:07.337 - abcDefghiJkl
2025-07-01 17:49:07.337 + abcdefGhijkl
2025-07-01 17:49:07.337 """
2025-07-01 17:49:07.337
2025-07-01 17:49:07.337 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.337 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.337 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.337 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.337 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.337
2025-07-01 17:49:07.337 # search for the pair that matches best without being identical
2025-07-01 17:49:07.338 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.338 # on junk -- unless we have to)
2025-07-01 17:49:07.338 for j in range(blo, bhi):
2025-07-01 17:49:07.338 bj = b[j]
2025-07-01 17:49:07.338 cruncher.set_seq2(bj)
2025-07-01 17:49:07.338 for i in range(alo, ahi):
2025-07-01 17:49:07.338 ai = a[i]
2025-07-01 17:49:07.338 if ai == bj:
2025-07-01 17:49:07.338 if eqi is None:
2025-07-01 17:49:07.338 eqi, eqj = i, j
2025-07-01 17:49:07.338 continue
2025-07-01 17:49:07.338 cruncher.set_seq1(ai)
2025-07-01 17:49:07.338 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.338 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.338 # compares by a factor of 3.
2025-07-01 17:49:07.338 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.339 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.339 # of the computation is cached by cruncher
2025-07-01 17:49:07.339 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.339 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.339 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.339 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.339 if best_ratio < cutoff:
2025-07-01 17:49:07.339 # no non-identical "pretty close" pair
2025-07-01 17:49:07.339 if eqi is None:
2025-07-01 17:49:07.339 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.339 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.339 return
2025-07-01 17:49:07.339 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.339 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.339 else:
2025-07-01 17:49:07.339 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.339 eqi = None
2025-07-01 17:49:07.340
2025-07-01 17:49:07.340 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.340 # identical
2025-07-01 17:49:07.340
2025-07-01 17:49:07.340 # pump out diffs from before the synch point
2025-07-01 17:49:07.340 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.340
2025-07-01 17:49:07.340 # do intraline marking on the synch pair
2025-07-01 17:49:07.340 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.340 if eqi is None:
2025-07-01 17:49:07.340 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.340 atags = btags = ""
2025-07-01 17:49:07.340 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.340 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.340 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.341 if tag == 'replace':
2025-07-01 17:49:07.341 atags += '^' * la
2025-07-01 17:49:07.341 btags += '^' * lb
2025-07-01 17:49:07.341 elif tag == 'delete':
2025-07-01 17:49:07.341 atags += '-' * la
2025-07-01 17:49:07.341 elif tag == 'insert':
2025-07-01 17:49:07.341 btags += '+' * lb
2025-07-01 17:49:07.341 elif tag == 'equal':
2025-07-01 17:49:07.341 atags += ' ' * la
2025-07-01 17:49:07.341 btags += ' ' * lb
2025-07-01 17:49:07.341 else:
2025-07-01 17:49:07.341 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.341 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.341 else:
2025-07-01 17:49:07.341 # the synch pair is identical
2025-07-01 17:49:07.341 yield ' ' + aelt
2025-07-01 17:49:07.341
2025-07-01 17:49:07.342 # pump out diffs from after the synch point
2025-07-01 17:49:07.342 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.342
2025-07-01 17:49:07.342 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.342 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.342
2025-07-01 17:49:07.342 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.342 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.342 alo = 254, ahi = 1101
2025-07-01 17:49:07.342 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.342 blo = 254, bhi = 1101
2025-07-01 17:49:07.342
2025-07-01 17:49:07.342 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.342 g = []
2025-07-01 17:49:07.342 if alo < ahi:
2025-07-01 17:49:07.342 if blo < bhi:
2025-07-01 17:49:07.343 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.343 else:
2025-07-01 17:49:07.343 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.343 elif blo < bhi:
2025-07-01 17:49:07.343 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.343
2025-07-01 17:49:07.343 > yield from g
2025-07-01 17:49:07.343
2025-07-01 17:49:07.343 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.343 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.343
2025-07-01 17:49:07.343 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.343 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.343 alo = 254, ahi = 1101
2025-07-01 17:49:07.343 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.343 blo = 254, bhi = 1101
2025-07-01 17:49:07.343
2025-07-01 17:49:07.344 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.347 r"""
2025-07-01 17:49:07.347 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.347 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.347 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.347 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.347
2025-07-01 17:49:07.347 Example:
2025-07-01 17:49:07.347
2025-07-01 17:49:07.347 >>> d = Differ()
2025-07-01 17:49:07.347 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.347 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.348 >>> print(''.join(results), end="")
2025-07-01 17:49:07.348 - abcDefghiJkl
2025-07-01 17:49:07.348 + abcdefGhijkl
2025-07-01 17:49:07.348 """
2025-07-01 17:49:07.348
2025-07-01 17:49:07.348 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.348 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.348 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.348 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.348 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.348
2025-07-01 17:49:07.348 # search for the pair that matches best without being identical
2025-07-01 17:49:07.348 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.349 # on junk -- unless we have to)
2025-07-01 17:49:07.349 for j in range(blo, bhi):
2025-07-01 17:49:07.349 bj = b[j]
2025-07-01 17:49:07.349 cruncher.set_seq2(bj)
2025-07-01 17:49:07.349 for i in range(alo, ahi):
2025-07-01 17:49:07.349 ai = a[i]
2025-07-01 17:49:07.349 if ai == bj:
2025-07-01 17:49:07.349 if eqi is None:
2025-07-01 17:49:07.349 eqi, eqj = i, j
2025-07-01 17:49:07.349 continue
2025-07-01 17:49:07.349 cruncher.set_seq1(ai)
2025-07-01 17:49:07.349 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.349 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.349 # compares by a factor of 3.
2025-07-01 17:49:07.349 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.349 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.350 # of the computation is cached by cruncher
2025-07-01 17:49:07.350 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.350 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.350 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.350 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.350 if best_ratio < cutoff:
2025-07-01 17:49:07.350 # no non-identical "pretty close" pair
2025-07-01 17:49:07.350 if eqi is None:
2025-07-01 17:49:07.350 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.350 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.350 return
2025-07-01 17:49:07.350 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.350 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.350 else:
2025-07-01 17:49:07.351 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.351 eqi = None
2025-07-01 17:49:07.351
2025-07-01 17:49:07.351 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.351 # identical
2025-07-01 17:49:07.351
2025-07-01 17:49:07.351 # pump out diffs from before the synch point
2025-07-01 17:49:07.351 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.351
2025-07-01 17:49:07.351 # do intraline marking on the synch pair
2025-07-01 17:49:07.351 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.351 if eqi is None:
2025-07-01 17:49:07.351 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.351 atags = btags = ""
2025-07-01 17:49:07.351 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.351 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.351 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.352 if tag == 'replace':
2025-07-01 17:49:07.352 atags += '^' * la
2025-07-01 17:49:07.352 btags += '^' * lb
2025-07-01 17:49:07.352 elif tag == 'delete':
2025-07-01 17:49:07.352 atags += '-' * la
2025-07-01 17:49:07.352 elif tag == 'insert':
2025-07-01 17:49:07.352 btags += '+' * lb
2025-07-01 17:49:07.352 elif tag == 'equal':
2025-07-01 17:49:07.352 atags += ' ' * la
2025-07-01 17:49:07.352 btags += ' ' * lb
2025-07-01 17:49:07.352 else:
2025-07-01 17:49:07.352 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.352 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.352 else:
2025-07-01 17:49:07.352 # the synch pair is identical
2025-07-01 17:49:07.352 yield ' ' + aelt
2025-07-01 17:49:07.352
2025-07-01 17:49:07.353 # pump out diffs from after the synch point
2025-07-01 17:49:07.353 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.353
2025-07-01 17:49:07.353 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.353 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.353
2025-07-01 17:49:07.353 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.353 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.353 alo = 255, ahi = 1101
2025-07-01 17:49:07.353 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.353 blo = 255, bhi = 1101
2025-07-01 17:49:07.353
2025-07-01 17:49:07.353 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.353 g = []
2025-07-01 17:49:07.353 if alo < ahi:
2025-07-01 17:49:07.353 if blo < bhi:
2025-07-01 17:49:07.353 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.354 else:
2025-07-01 17:49:07.354 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.354 elif blo < bhi:
2025-07-01 17:49:07.354 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.354
2025-07-01 17:49:07.354 > yield from g
2025-07-01 17:49:07.354
2025-07-01 17:49:07.354 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.354 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.354
2025-07-01 17:49:07.354 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.354 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.354 alo = 255, ahi = 1101
2025-07-01 17:49:07.354 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.354 blo = 255, bhi = 1101
2025-07-01 17:49:07.354
2025-07-01 17:49:07.355 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.355 r"""
2025-07-01 17:49:07.355 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.355 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.355 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.355 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.355
2025-07-01 17:49:07.355 Example:
2025-07-01 17:49:07.355
2025-07-01 17:49:07.355 >>> d = Differ()
2025-07-01 17:49:07.355 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.355 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.355 >>> print(''.join(results), end="")
2025-07-01 17:49:07.355 - abcDefghiJkl
2025-07-01 17:49:07.355 + abcdefGhijkl
2025-07-01 17:49:07.356 """
2025-07-01 17:49:07.356
2025-07-01 17:49:07.356 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.356 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.356 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.356 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.356 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.356
2025-07-01 17:49:07.356 # search for the pair that matches best without being identical
2025-07-01 17:49:07.356 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.356 # on junk -- unless we have to)
2025-07-01 17:49:07.356 for j in range(blo, bhi):
2025-07-01 17:49:07.356 bj = b[j]
2025-07-01 17:49:07.356 cruncher.set_seq2(bj)
2025-07-01 17:49:07.356 for i in range(alo, ahi):
2025-07-01 17:49:07.356 ai = a[i]
2025-07-01 17:49:07.356 if ai == bj:
2025-07-01 17:49:07.357 if eqi is None:
2025-07-01 17:49:07.357 eqi, eqj = i, j
2025-07-01 17:49:07.357 continue
2025-07-01 17:49:07.357 cruncher.set_seq1(ai)
2025-07-01 17:49:07.357 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.357 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.357 # compares by a factor of 3.
2025-07-01 17:49:07.357 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.357 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.357 # of the computation is cached by cruncher
2025-07-01 17:49:07.357 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.357 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.357 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.357 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.357 if best_ratio < cutoff:
2025-07-01 17:49:07.357 # no non-identical "pretty close" pair
2025-07-01 17:49:07.357 if eqi is None:
2025-07-01 17:49:07.358 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.358 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.358 return
2025-07-01 17:49:07.358 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.358 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.358 else:
2025-07-01 17:49:07.358 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.358 eqi = None
2025-07-01 17:49:07.358
2025-07-01 17:49:07.358 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.358 # identical
2025-07-01 17:49:07.358
2025-07-01 17:49:07.358 # pump out diffs from before the synch point
2025-07-01 17:49:07.358 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.358
2025-07-01 17:49:07.358 # do intraline marking on the synch pair
2025-07-01 17:49:07.358 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.359 if eqi is None:
2025-07-01 17:49:07.359 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.359 atags = btags = ""
2025-07-01 17:49:07.359 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.359 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.359 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.359 if tag == 'replace':
2025-07-01 17:49:07.359 atags += '^' * la
2025-07-01 17:49:07.359 btags += '^' * lb
2025-07-01 17:49:07.359 elif tag == 'delete':
2025-07-01 17:49:07.359 atags += '-' * la
2025-07-01 17:49:07.359 elif tag == 'insert':
2025-07-01 17:49:07.359 btags += '+' * lb
2025-07-01 17:49:07.359 elif tag == 'equal':
2025-07-01 17:49:07.360 atags += ' ' * la
2025-07-01 17:49:07.365 btags += ' ' * lb
2025-07-01 17:49:07.365 else:
2025-07-01 17:49:07.365 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.365 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.365 else:
2025-07-01 17:49:07.365 # the synch pair is identical
2025-07-01 17:49:07.365 yield ' ' + aelt
2025-07-01 17:49:07.365
2025-07-01 17:49:07.365 # pump out diffs from after the synch point
2025-07-01 17:49:07.365 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.365
2025-07-01 17:49:07.365 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.365 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.365
2025-07-01 17:49:07.366 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.366 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.366 alo = 256, ahi = 1101
2025-07-01 17:49:07.366 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.366 blo = 256, bhi = 1101
2025-07-01 17:49:07.366
2025-07-01 17:49:07.366 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.366 g = []
2025-07-01 17:49:07.366 if alo < ahi:
2025-07-01 17:49:07.366 if blo < bhi:
2025-07-01 17:49:07.366 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.366 else:
2025-07-01 17:49:07.366 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.366 elif blo < bhi:
2025-07-01 17:49:07.367 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.367
2025-07-01 17:49:07.367 > yield from g
2025-07-01 17:49:07.367
2025-07-01 17:49:07.367 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.367 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.367
2025-07-01 17:49:07.367 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.367 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.367 alo = 256, ahi = 1101
2025-07-01 17:49:07.367 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.367 blo = 256, bhi = 1101
2025-07-01 17:49:07.367
2025-07-01 17:49:07.367 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.367 r"""
2025-07-01 17:49:07.367 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.368 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.368 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.368 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.368
2025-07-01 17:49:07.368 Example:
2025-07-01 17:49:07.368
2025-07-01 17:49:07.368 >>> d = Differ()
2025-07-01 17:49:07.368 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.368 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.368 >>> print(''.join(results), end="")
2025-07-01 17:49:07.368 - abcDefghiJkl
2025-07-01 17:49:07.368 + abcdefGhijkl
2025-07-01 17:49:07.368 """
2025-07-01 17:49:07.368
2025-07-01 17:49:07.368 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.369 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.369 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.369 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.369 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.369
2025-07-01 17:49:07.369 # search for the pair that matches best without being identical
2025-07-01 17:49:07.369 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.369 # on junk -- unless we have to)
2025-07-01 17:49:07.369 for j in range(blo, bhi):
2025-07-01 17:49:07.369 bj = b[j]
2025-07-01 17:49:07.369 cruncher.set_seq2(bj)
2025-07-01 17:49:07.369 for i in range(alo, ahi):
2025-07-01 17:49:07.369 ai = a[i]
2025-07-01 17:49:07.369 if ai == bj:
2025-07-01 17:49:07.369 if eqi is None:
2025-07-01 17:49:07.369 eqi, eqj = i, j
2025-07-01 17:49:07.370 continue
2025-07-01 17:49:07.370 cruncher.set_seq1(ai)
2025-07-01 17:49:07.370 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.370 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.370 # compares by a factor of 3.
2025-07-01 17:49:07.370 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.370 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.370 # of the computation is cached by cruncher
2025-07-01 17:49:07.370 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.370 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.370 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.370 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.370 if best_ratio < cutoff:
2025-07-01 17:49:07.370 # no non-identical "pretty close" pair
2025-07-01 17:49:07.370 if eqi is None:
2025-07-01 17:49:07.370 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.370 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.371 return
2025-07-01 17:49:07.371 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.371 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.371 else:
2025-07-01 17:49:07.371 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.371 eqi = None
2025-07-01 17:49:07.371
2025-07-01 17:49:07.371 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.371 # identical
2025-07-01 17:49:07.371
2025-07-01 17:49:07.371 # pump out diffs from before the synch point
2025-07-01 17:49:07.371 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.371
2025-07-01 17:49:07.371 # do intraline marking on the synch pair
2025-07-01 17:49:07.371 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.371 if eqi is None:
2025-07-01 17:49:07.372 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.372 atags = btags = ""
2025-07-01 17:49:07.372 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.372 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.372 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.372 if tag == 'replace':
2025-07-01 17:49:07.372 atags += '^' * la
2025-07-01 17:49:07.372 btags += '^' * lb
2025-07-01 17:49:07.372 elif tag == 'delete':
2025-07-01 17:49:07.372 atags += '-' * la
2025-07-01 17:49:07.372 elif tag == 'insert':
2025-07-01 17:49:07.372 btags += '+' * lb
2025-07-01 17:49:07.372 elif tag == 'equal':
2025-07-01 17:49:07.372 atags += ' ' * la
2025-07-01 17:49:07.372 btags += ' ' * lb
2025-07-01 17:49:07.372 else:
2025-07-01 17:49:07.372 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.373 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.373 else:
2025-07-01 17:49:07.373 # the synch pair is identical
2025-07-01 17:49:07.373 yield ' ' + aelt
2025-07-01 17:49:07.373
2025-07-01 17:49:07.373 # pump out diffs from after the synch point
2025-07-01 17:49:07.373 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.373
2025-07-01 17:49:07.373 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.373 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.373
2025-07-01 17:49:07.373 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.373 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.373 alo = 257, ahi = 1101
2025-07-01 17:49:07.373 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.373 blo = 257, bhi = 1101
2025-07-01 17:49:07.373
2025-07-01 17:49:07.374 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.374 g = []
2025-07-01 17:49:07.374 if alo < ahi:
2025-07-01 17:49:07.374 if blo < bhi:
2025-07-01 17:49:07.374 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.374 else:
2025-07-01 17:49:07.374 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.374 elif blo < bhi:
2025-07-01 17:49:07.374 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.374
2025-07-01 17:49:07.374 > yield from g
2025-07-01 17:49:07.374
2025-07-01 17:49:07.374 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.374 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.374
2025-07-01 17:49:07.374 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.374 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.375 alo = 257, ahi = 1101
2025-07-01 17:49:07.375 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.375 blo = 257, bhi = 1101
2025-07-01 17:49:07.375
2025-07-01 17:49:07.375 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.375 r"""
2025-07-01 17:49:07.375 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.375 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.375 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.375 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.375
2025-07-01 17:49:07.375 Example:
2025-07-01 17:49:07.375
2025-07-01 17:49:07.375 >>> d = Differ()
2025-07-01 17:49:07.375 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.376 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.379 >>> print(''.join(results), end="")
2025-07-01 17:49:07.379 - abcDefghiJkl
2025-07-01 17:49:07.379 + abcdefGhijkl
2025-07-01 17:49:07.379 """
2025-07-01 17:49:07.379
2025-07-01 17:49:07.379 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.379 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.380 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.380 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.380 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.380
2025-07-01 17:49:07.380 # search for the pair that matches best without being identical
2025-07-01 17:49:07.380 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.380 # on junk -- unless we have to)
2025-07-01 17:49:07.380 for j in range(blo, bhi):
2025-07-01 17:49:07.380 bj = b[j]
2025-07-01 17:49:07.380 cruncher.set_seq2(bj)
2025-07-01 17:49:07.380 for i in range(alo, ahi):
2025-07-01 17:49:07.380 ai = a[i]
2025-07-01 17:49:07.380 if ai == bj:
2025-07-01 17:49:07.380 if eqi is None:
2025-07-01 17:49:07.380 eqi, eqj = i, j
2025-07-01 17:49:07.380 continue
2025-07-01 17:49:07.381 cruncher.set_seq1(ai)
2025-07-01 17:49:07.381 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.381 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.381 # compares by a factor of 3.
2025-07-01 17:49:07.381 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.381 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.381 # of the computation is cached by cruncher
2025-07-01 17:49:07.381 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.381 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.381 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.381 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.381 if best_ratio < cutoff:
2025-07-01 17:49:07.381 # no non-identical "pretty close" pair
2025-07-01 17:49:07.381 if eqi is None:
2025-07-01 17:49:07.381 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.381 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.382 return
2025-07-01 17:49:07.382 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.382 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.382 else:
2025-07-01 17:49:07.382 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.382 eqi = None
2025-07-01 17:49:07.382
2025-07-01 17:49:07.382 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.382 # identical
2025-07-01 17:49:07.382
2025-07-01 17:49:07.382 # pump out diffs from before the synch point
2025-07-01 17:49:07.382 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.382
2025-07-01 17:49:07.382 # do intraline marking on the synch pair
2025-07-01 17:49:07.382 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.382 if eqi is None:
2025-07-01 17:49:07.382 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.383 atags = btags = ""
2025-07-01 17:49:07.383 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.383 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.383 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.383 if tag == 'replace':
2025-07-01 17:49:07.383 atags += '^' * la
2025-07-01 17:49:07.383 btags += '^' * lb
2025-07-01 17:49:07.383 elif tag == 'delete':
2025-07-01 17:49:07.383 atags += '-' * la
2025-07-01 17:49:07.383 elif tag == 'insert':
2025-07-01 17:49:07.383 btags += '+' * lb
2025-07-01 17:49:07.383 elif tag == 'equal':
2025-07-01 17:49:07.383 atags += ' ' * la
2025-07-01 17:49:07.383 btags += ' ' * lb
2025-07-01 17:49:07.383 else:
2025-07-01 17:49:07.383 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.384 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.384 else:
2025-07-01 17:49:07.384 # the synch pair is identical
2025-07-01 17:49:07.384 yield ' ' + aelt
2025-07-01 17:49:07.384
2025-07-01 17:49:07.384 # pump out diffs from after the synch point
2025-07-01 17:49:07.384 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.384
2025-07-01 17:49:07.384 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.384 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.384
2025-07-01 17:49:07.384 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.384 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.384 alo = 258, ahi = 1101
2025-07-01 17:49:07.384 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.384 blo = 258, bhi = 1101
2025-07-01 17:49:07.385
2025-07-01 17:49:07.385 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.385 g = []
2025-07-01 17:49:07.385 if alo < ahi:
2025-07-01 17:49:07.385 if blo < bhi:
2025-07-01 17:49:07.385 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.385 else:
2025-07-01 17:49:07.385 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.385 elif blo < bhi:
2025-07-01 17:49:07.385 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.385
2025-07-01 17:49:07.385 > yield from g
2025-07-01 17:49:07.385
2025-07-01 17:49:07.385 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.385 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.385
2025-07-01 17:49:07.385 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.386 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.386 alo = 258, ahi = 1101
2025-07-01 17:49:07.386 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.386 blo = 258, bhi = 1101
2025-07-01 17:49:07.386
2025-07-01 17:49:07.386 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.386 r"""
2025-07-01 17:49:07.386 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.386 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.386 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.386 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.386
2025-07-01 17:49:07.386 Example:
2025-07-01 17:49:07.386
2025-07-01 17:49:07.386 >>> d = Differ()
2025-07-01 17:49:07.387 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.387 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.387 >>> print(''.join(results), end="")
2025-07-01 17:49:07.387 - abcDefghiJkl
2025-07-01 17:49:07.387 + abcdefGhijkl
2025-07-01 17:49:07.387 """
2025-07-01 17:49:07.387
2025-07-01 17:49:07.387 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.387 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.387 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.387 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.387 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.387
2025-07-01 17:49:07.387 # search for the pair that matches best without being identical
2025-07-01 17:49:07.388 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.388 # on junk -- unless we have to)
2025-07-01 17:49:07.388 for j in range(blo, bhi):
2025-07-01 17:49:07.388 bj = b[j]
2025-07-01 17:49:07.388 cruncher.set_seq2(bj)
2025-07-01 17:49:07.388 for i in range(alo, ahi):
2025-07-01 17:49:07.388 ai = a[i]
2025-07-01 17:49:07.388 if ai == bj:
2025-07-01 17:49:07.388 if eqi is None:
2025-07-01 17:49:07.388 eqi, eqj = i, j
2025-07-01 17:49:07.388 continue
2025-07-01 17:49:07.388 cruncher.set_seq1(ai)
2025-07-01 17:49:07.388 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.388 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.388 # compares by a factor of 3.
2025-07-01 17:49:07.388 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.389 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.389 # of the computation is cached by cruncher
2025-07-01 17:49:07.389 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.389 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.389 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.389 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.389 if best_ratio < cutoff:
2025-07-01 17:49:07.389 # no non-identical "pretty close" pair
2025-07-01 17:49:07.389 if eqi is None:
2025-07-01 17:49:07.389 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.389 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.389 return
2025-07-01 17:49:07.389 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.389 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.389 else:
2025-07-01 17:49:07.390 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.390 eqi = None
2025-07-01 17:49:07.390
2025-07-01 17:49:07.390 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.390 # identical
2025-07-01 17:49:07.390
2025-07-01 17:49:07.390 # pump out diffs from before the synch point
2025-07-01 17:49:07.390 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.390
2025-07-01 17:49:07.390 # do intraline marking on the synch pair
2025-07-01 17:49:07.390 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.390 if eqi is None:
2025-07-01 17:49:07.390 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.390 atags = btags = ""
2025-07-01 17:49:07.390 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.390 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.390 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.395 if tag == 'replace':
2025-07-01 17:49:07.395 atags += '^' * la
2025-07-01 17:49:07.396 btags += '^' * lb
2025-07-01 17:49:07.396 elif tag == 'delete':
2025-07-01 17:49:07.396 atags += '-' * la
2025-07-01 17:49:07.396 elif tag == 'insert':
2025-07-01 17:49:07.396 btags += '+' * lb
2025-07-01 17:49:07.396 elif tag == 'equal':
2025-07-01 17:49:07.396 atags += ' ' * la
2025-07-01 17:49:07.396 btags += ' ' * lb
2025-07-01 17:49:07.396 else:
2025-07-01 17:49:07.396 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.396 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.396 else:
2025-07-01 17:49:07.396 # the synch pair is identical
2025-07-01 17:49:07.396 yield ' ' + aelt
2025-07-01 17:49:07.396
2025-07-01 17:49:07.396 # pump out diffs from after the synch point
2025-07-01 17:49:07.397 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.397
2025-07-01 17:49:07.397 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.397 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.397
2025-07-01 17:49:07.397 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.397 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.397 alo = 259, ahi = 1101
2025-07-01 17:49:07.397 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.397 blo = 259, bhi = 1101
2025-07-01 17:49:07.397
2025-07-01 17:49:07.397 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.397 g = []
2025-07-01 17:49:07.397 if alo < ahi:
2025-07-01 17:49:07.397 if blo < bhi:
2025-07-01 17:49:07.398 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.398 else:
2025-07-01 17:49:07.398 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.398 elif blo < bhi:
2025-07-01 17:49:07.398 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.398
2025-07-01 17:49:07.398 > yield from g
2025-07-01 17:49:07.398
2025-07-01 17:49:07.398 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.398 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.398
2025-07-01 17:49:07.398 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.398 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.398 alo = 259, ahi = 1101
2025-07-01 17:49:07.398 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.398 blo = 259, bhi = 1101
2025-07-01 17:49:07.399
2025-07-01 17:49:07.399 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.399 r"""
2025-07-01 17:49:07.399 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.399 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.399 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.399 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.399
2025-07-01 17:49:07.399 Example:
2025-07-01 17:49:07.399
2025-07-01 17:49:07.399 >>> d = Differ()
2025-07-01 17:49:07.399 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.399 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.399 >>> print(''.join(results), end="")
2025-07-01 17:49:07.399 - abcDefghiJkl
2025-07-01 17:49:07.399 + abcdefGhijkl
2025-07-01 17:49:07.400 """
2025-07-01 17:49:07.400
2025-07-01 17:49:07.400 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.400 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.400 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.400 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.400 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.400
2025-07-01 17:49:07.400 # search for the pair that matches best without being identical
2025-07-01 17:49:07.400 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.400 # on junk -- unless we have to)
2025-07-01 17:49:07.400 for j in range(blo, bhi):
2025-07-01 17:49:07.400 bj = b[j]
2025-07-01 17:49:07.400 cruncher.set_seq2(bj)
2025-07-01 17:49:07.400 for i in range(alo, ahi):
2025-07-01 17:49:07.401 ai = a[i]
2025-07-01 17:49:07.401 if ai == bj:
2025-07-01 17:49:07.401 if eqi is None:
2025-07-01 17:49:07.401 eqi, eqj = i, j
2025-07-01 17:49:07.401 continue
2025-07-01 17:49:07.401 cruncher.set_seq1(ai)
2025-07-01 17:49:07.401 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.401 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.401 # compares by a factor of 3.
2025-07-01 17:49:07.401 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.401 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.401 # of the computation is cached by cruncher
2025-07-01 17:49:07.401 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.401 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.401 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.401 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.401 if best_ratio < cutoff:
2025-07-01 17:49:07.402 # no non-identical "pretty close" pair
2025-07-01 17:49:07.402 if eqi is None:
2025-07-01 17:49:07.402 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.402 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.402 return
2025-07-01 17:49:07.402 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.402 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.402 else:
2025-07-01 17:49:07.402 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.402 eqi = None
2025-07-01 17:49:07.402
2025-07-01 17:49:07.402 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.402 # identical
2025-07-01 17:49:07.402
2025-07-01 17:49:07.402 # pump out diffs from before the synch point
2025-07-01 17:49:07.402 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.403
2025-07-01 17:49:07.403 # do intraline marking on the synch pair
2025-07-01 17:49:07.403 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.403 if eqi is None:
2025-07-01 17:49:07.403 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.403 atags = btags = ""
2025-07-01 17:49:07.403 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.403 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.403 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.403 if tag == 'replace':
2025-07-01 17:49:07.403 atags += '^' * la
2025-07-01 17:49:07.403 btags += '^' * lb
2025-07-01 17:49:07.403 elif tag == 'delete':
2025-07-01 17:49:07.403 atags += '-' * la
2025-07-01 17:49:07.403 elif tag == 'insert':
2025-07-01 17:49:07.403 btags += '+' * lb
2025-07-01 17:49:07.404 elif tag == 'equal':
2025-07-01 17:49:07.404 atags += ' ' * la
2025-07-01 17:49:07.404 btags += ' ' * lb
2025-07-01 17:49:07.404 else:
2025-07-01 17:49:07.404 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.404 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.404 else:
2025-07-01 17:49:07.404 # the synch pair is identical
2025-07-01 17:49:07.404 yield ' ' + aelt
2025-07-01 17:49:07.404
2025-07-01 17:49:07.404 # pump out diffs from after the synch point
2025-07-01 17:49:07.404 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.404
2025-07-01 17:49:07.404 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.404 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.404
2025-07-01 17:49:07.405 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.405 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.405 alo = 260, ahi = 1101
2025-07-01 17:49:07.405 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.405 blo = 260, bhi = 1101
2025-07-01 17:49:07.405
2025-07-01 17:49:07.405 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.405 g = []
2025-07-01 17:49:07.405 if alo < ahi:
2025-07-01 17:49:07.405 if blo < bhi:
2025-07-01 17:49:07.405 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.405 else:
2025-07-01 17:49:07.405 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.405 elif blo < bhi:
2025-07-01 17:49:07.405 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.406
2025-07-01 17:49:07.409 > yield from g
2025-07-01 17:49:07.409
2025-07-01 17:49:07.409 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.409 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.409
2025-07-01 17:49:07.409 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.409 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.410 alo = 260, ahi = 1101
2025-07-01 17:49:07.410 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.410 blo = 260, bhi = 1101
2025-07-01 17:49:07.410
2025-07-01 17:49:07.410 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.410 r"""
2025-07-01 17:49:07.410 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.410 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.410 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.410 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.410
2025-07-01 17:49:07.410 Example:
2025-07-01 17:49:07.410
2025-07-01 17:49:07.410 >>> d = Differ()
2025-07-01 17:49:07.410 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.410 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.411 >>> print(''.join(results), end="")
2025-07-01 17:49:07.411 - abcDefghiJkl
2025-07-01 17:49:07.411 + abcdefGhijkl
2025-07-01 17:49:07.411 """
2025-07-01 17:49:07.411
2025-07-01 17:49:07.411 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.411 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.411 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.411 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.411 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.411
2025-07-01 17:49:07.411 # search for the pair that matches best without being identical
2025-07-01 17:49:07.411 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.411 # on junk -- unless we have to)
2025-07-01 17:49:07.411 for j in range(blo, bhi):
2025-07-01 17:49:07.412 bj = b[j]
2025-07-01 17:49:07.412 cruncher.set_seq2(bj)
2025-07-01 17:49:07.412 for i in range(alo, ahi):
2025-07-01 17:49:07.412 ai = a[i]
2025-07-01 17:49:07.412 if ai == bj:
2025-07-01 17:49:07.412 if eqi is None:
2025-07-01 17:49:07.412 eqi, eqj = i, j
2025-07-01 17:49:07.412 continue
2025-07-01 17:49:07.412 cruncher.set_seq1(ai)
2025-07-01 17:49:07.412 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.412 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.412 # compares by a factor of 3.
2025-07-01 17:49:07.412 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.412 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.412 # of the computation is cached by cruncher
2025-07-01 17:49:07.412 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.413 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.413 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.413 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.413 if best_ratio < cutoff:
2025-07-01 17:49:07.413 # no non-identical "pretty close" pair
2025-07-01 17:49:07.413 if eqi is None:
2025-07-01 17:49:07.413 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.413 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.413 return
2025-07-01 17:49:07.413 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.413 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.413 else:
2025-07-01 17:49:07.413 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.413 eqi = None
2025-07-01 17:49:07.413
2025-07-01 17:49:07.414 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.414 # identical
2025-07-01 17:49:07.414
2025-07-01 17:49:07.414 # pump out diffs from before the synch point
2025-07-01 17:49:07.414 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.414
2025-07-01 17:49:07.414 # do intraline marking on the synch pair
2025-07-01 17:49:07.414 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.414 if eqi is None:
2025-07-01 17:49:07.414 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.414 atags = btags = ""
2025-07-01 17:49:07.414 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.414 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.414 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.414 if tag == 'replace':
2025-07-01 17:49:07.414 atags += '^' * la
2025-07-01 17:49:07.415 btags += '^' * lb
2025-07-01 17:49:07.415 elif tag == 'delete':
2025-07-01 17:49:07.415 atags += '-' * la
2025-07-01 17:49:07.415 elif tag == 'insert':
2025-07-01 17:49:07.415 btags += '+' * lb
2025-07-01 17:49:07.415 elif tag == 'equal':
2025-07-01 17:49:07.415 atags += ' ' * la
2025-07-01 17:49:07.415 btags += ' ' * lb
2025-07-01 17:49:07.415 else:
2025-07-01 17:49:07.415 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.415 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.415 else:
2025-07-01 17:49:07.415 # the synch pair is identical
2025-07-01 17:49:07.415 yield ' ' + aelt
2025-07-01 17:49:07.415
2025-07-01 17:49:07.415 # pump out diffs from after the synch point
2025-07-01 17:49:07.415 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.416
2025-07-01 17:49:07.416 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.416 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.416
2025-07-01 17:49:07.416 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.416 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.416 alo = 261, ahi = 1101
2025-07-01 17:49:07.416 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.416 blo = 261, bhi = 1101
2025-07-01 17:49:07.416
2025-07-01 17:49:07.416 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.416 g = []
2025-07-01 17:49:07.416 if alo < ahi:
2025-07-01 17:49:07.416 if blo < bhi:
2025-07-01 17:49:07.416 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.416 else:
2025-07-01 17:49:07.417 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.417 elif blo < bhi:
2025-07-01 17:49:07.417 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.417
2025-07-01 17:49:07.417 > yield from g
2025-07-01 17:49:07.417
2025-07-01 17:49:07.417 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.417 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.417
2025-07-01 17:49:07.417 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.417 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.417 alo = 261, ahi = 1101
2025-07-01 17:49:07.417 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.417 blo = 261, bhi = 1101
2025-07-01 17:49:07.417
2025-07-01 17:49:07.417 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.418 r"""
2025-07-01 17:49:07.418 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.418 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.418 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.418 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.418
2025-07-01 17:49:07.418 Example:
2025-07-01 17:49:07.418
2025-07-01 17:49:07.418 >>> d = Differ()
2025-07-01 17:49:07.418 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.418 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.418 >>> print(''.join(results), end="")
2025-07-01 17:49:07.418 - abcDefghiJkl
2025-07-01 17:49:07.418 + abcdefGhijkl
2025-07-01 17:49:07.418 """
2025-07-01 17:49:07.419
2025-07-01 17:49:07.419 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.419 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.419 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.419 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.419 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.419
2025-07-01 17:49:07.419 # search for the pair that matches best without being identical
2025-07-01 17:49:07.419 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.419 # on junk -- unless we have to)
2025-07-01 17:49:07.419 for j in range(blo, bhi):
2025-07-01 17:49:07.419 bj = b[j]
2025-07-01 17:49:07.419 cruncher.set_seq2(bj)
2025-07-01 17:49:07.419 for i in range(alo, ahi):
2025-07-01 17:49:07.419 ai = a[i]
2025-07-01 17:49:07.420 if ai == bj:
2025-07-01 17:49:07.420 if eqi is None:
2025-07-01 17:49:07.420 eqi, eqj = i, j
2025-07-01 17:49:07.420 continue
2025-07-01 17:49:07.420 cruncher.set_seq1(ai)
2025-07-01 17:49:07.420 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.420 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.420 # compares by a factor of 3.
2025-07-01 17:49:07.420 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.420 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.420 # of the computation is cached by cruncher
2025-07-01 17:49:07.420 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.420 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.420 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.420 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.420 if best_ratio < cutoff:
2025-07-01 17:49:07.421 # no non-identical "pretty close" pair
2025-07-01 17:49:07.421 if eqi is None:
2025-07-01 17:49:07.421 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.421 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.421 return
2025-07-01 17:49:07.421 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.421 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.421 else:
2025-07-01 17:49:07.421 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.421 eqi = None
2025-07-01 17:49:07.421
2025-07-01 17:49:07.421 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.421 # identical
2025-07-01 17:49:07.421
2025-07-01 17:49:07.421 # pump out diffs from before the synch point
2025-07-01 17:49:07.421 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.422
2025-07-01 17:49:07.426 # do intraline marking on the synch pair
2025-07-01 17:49:07.426 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.427 if eqi is None:
2025-07-01 17:49:07.427 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.427 atags = btags = ""
2025-07-01 17:49:07.427 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.427 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.427 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.427 if tag == 'replace':
2025-07-01 17:49:07.427 atags += '^' * la
2025-07-01 17:49:07.427 btags += '^' * lb
2025-07-01 17:49:07.427 elif tag == 'delete':
2025-07-01 17:49:07.427 atags += '-' * la
2025-07-01 17:49:07.427 elif tag == 'insert':
2025-07-01 17:49:07.427 btags += '+' * lb
2025-07-01 17:49:07.427 elif tag == 'equal':
2025-07-01 17:49:07.427 atags += ' ' * la
2025-07-01 17:49:07.427 btags += ' ' * lb
2025-07-01 17:49:07.428 else:
2025-07-01 17:49:07.428 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.428 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.428 else:
2025-07-01 17:49:07.428 # the synch pair is identical
2025-07-01 17:49:07.428 yield ' ' + aelt
2025-07-01 17:49:07.428
2025-07-01 17:49:07.428 # pump out diffs from after the synch point
2025-07-01 17:49:07.428 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.428
2025-07-01 17:49:07.428 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.428 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.428
2025-07-01 17:49:07.428 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.428 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.428 alo = 262, ahi = 1101
2025-07-01 17:49:07.428 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.428 blo = 262, bhi = 1101
2025-07-01 17:49:07.428
2025-07-01 17:49:07.428 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.429 g = []
2025-07-01 17:49:07.429 if alo < ahi:
2025-07-01 17:49:07.429 if blo < bhi:
2025-07-01 17:49:07.429 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.429 else:
2025-07-01 17:49:07.429 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.429 elif blo < bhi:
2025-07-01 17:49:07.429 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.429
2025-07-01 17:49:07.429 > yield from g
2025-07-01 17:49:07.429
2025-07-01 17:49:07.429 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.429 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.429
2025-07-01 17:49:07.429 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.429 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.429 alo = 262, ahi = 1101
2025-07-01 17:49:07.429 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.429 blo = 262, bhi = 1101
2025-07-01 17:49:07.429
2025-07-01 17:49:07.429 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.430 r"""
2025-07-01 17:49:07.430 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.430 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.430 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.430 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.430
2025-07-01 17:49:07.430 Example:
2025-07-01 17:49:07.430
2025-07-01 17:49:07.430 >>> d = Differ()
2025-07-01 17:49:07.430 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.430 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.430 >>> print(''.join(results), end="")
2025-07-01 17:49:07.430 - abcDefghiJkl
2025-07-01 17:49:07.430 + abcdefGhijkl
2025-07-01 17:49:07.430 """
2025-07-01 17:49:07.430
2025-07-01 17:49:07.430 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.430 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.431 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.431 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.431 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.431
2025-07-01 17:49:07.431 # search for the pair that matches best without being identical
2025-07-01 17:49:07.431 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.431 # on junk -- unless we have to)
2025-07-01 17:49:07.431 for j in range(blo, bhi):
2025-07-01 17:49:07.431 bj = b[j]
2025-07-01 17:49:07.431 cruncher.set_seq2(bj)
2025-07-01 17:49:07.431 for i in range(alo, ahi):
2025-07-01 17:49:07.431 ai = a[i]
2025-07-01 17:49:07.431 if ai == bj:
2025-07-01 17:49:07.431 if eqi is None:
2025-07-01 17:49:07.431 eqi, eqj = i, j
2025-07-01 17:49:07.431 continue
2025-07-01 17:49:07.431 cruncher.set_seq1(ai)
2025-07-01 17:49:07.431 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.431 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.431 # compares by a factor of 3.
2025-07-01 17:49:07.431 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.432 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.432 # of the computation is cached by cruncher
2025-07-01 17:49:07.432 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.432 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.432 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.432 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.432 if best_ratio < cutoff:
2025-07-01 17:49:07.432 # no non-identical "pretty close" pair
2025-07-01 17:49:07.432 if eqi is None:
2025-07-01 17:49:07.432 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.432 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.432 return
2025-07-01 17:49:07.432 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.432 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.432 else:
2025-07-01 17:49:07.432 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.432 eqi = None
2025-07-01 17:49:07.432
2025-07-01 17:49:07.432 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.432 # identical
2025-07-01 17:49:07.432
2025-07-01 17:49:07.433 # pump out diffs from before the synch point
2025-07-01 17:49:07.433 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.433
2025-07-01 17:49:07.433 # do intraline marking on the synch pair
2025-07-01 17:49:07.433 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.433 if eqi is None:
2025-07-01 17:49:07.433 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.433 atags = btags = ""
2025-07-01 17:49:07.433 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.433 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.433 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.433 if tag == 'replace':
2025-07-01 17:49:07.433 atags += '^' * la
2025-07-01 17:49:07.433 btags += '^' * lb
2025-07-01 17:49:07.433 elif tag == 'delete':
2025-07-01 17:49:07.433 atags += '-' * la
2025-07-01 17:49:07.433 elif tag == 'insert':
2025-07-01 17:49:07.433 btags += '+' * lb
2025-07-01 17:49:07.434 elif tag == 'equal':
2025-07-01 17:49:07.434 atags += ' ' * la
2025-07-01 17:49:07.434 btags += ' ' * lb
2025-07-01 17:49:07.434 else:
2025-07-01 17:49:07.434 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.434 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.434 else:
2025-07-01 17:49:07.434 # the synch pair is identical
2025-07-01 17:49:07.434 yield ' ' + aelt
2025-07-01 17:49:07.434
2025-07-01 17:49:07.434 # pump out diffs from after the synch point
2025-07-01 17:49:07.434 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.434
2025-07-01 17:49:07.434 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.434 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.434
2025-07-01 17:49:07.434 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.434 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.434 alo = 263, ahi = 1101
2025-07-01 17:49:07.434 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.435 blo = 263, bhi = 1101
2025-07-01 17:49:07.435
2025-07-01 17:49:07.435 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.435 g = []
2025-07-01 17:49:07.435 if alo < ahi:
2025-07-01 17:49:07.435 if blo < bhi:
2025-07-01 17:49:07.435 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.435 else:
2025-07-01 17:49:07.435 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.435 elif blo < bhi:
2025-07-01 17:49:07.435 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.435
2025-07-01 17:49:07.435 > yield from g
2025-07-01 17:49:07.435
2025-07-01 17:49:07.435 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.435 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.435
2025-07-01 17:49:07.435 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.435 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.435 alo = 263, ahi = 1101
2025-07-01 17:49:07.435 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.436 blo = 263, bhi = 1101
2025-07-01 17:49:07.436
2025-07-01 17:49:07.436 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.436 r"""
2025-07-01 17:49:07.436 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.436 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.436 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.436 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.436
2025-07-01 17:49:07.436 Example:
2025-07-01 17:49:07.436
2025-07-01 17:49:07.436 >>> d = Differ()
2025-07-01 17:49:07.436 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.436 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.436 >>> print(''.join(results), end="")
2025-07-01 17:49:07.436 - abcDefghiJkl
2025-07-01 17:49:07.436 + abcdefGhijkl
2025-07-01 17:49:07.436 """
2025-07-01 17:49:07.437
2025-07-01 17:49:07.437 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.437 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.437 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.437 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.437 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.437
2025-07-01 17:49:07.437 # search for the pair that matches best without being identical
2025-07-01 17:49:07.437 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.437 # on junk -- unless we have to)
2025-07-01 17:49:07.437 for j in range(blo, bhi):
2025-07-01 17:49:07.437 bj = b[j]
2025-07-01 17:49:07.437 cruncher.set_seq2(bj)
2025-07-01 17:49:07.437 for i in range(alo, ahi):
2025-07-01 17:49:07.437 ai = a[i]
2025-07-01 17:49:07.437 if ai == bj:
2025-07-01 17:49:07.437 if eqi is None:
2025-07-01 17:49:07.437 eqi, eqj = i, j
2025-07-01 17:49:07.437 continue
2025-07-01 17:49:07.437 cruncher.set_seq1(ai)
2025-07-01 17:49:07.437 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.440 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.441 # compares by a factor of 3.
2025-07-01 17:49:07.441 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.441 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.441 # of the computation is cached by cruncher
2025-07-01 17:49:07.441 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.441 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.441 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.441 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.441 if best_ratio < cutoff:
2025-07-01 17:49:07.441 # no non-identical "pretty close" pair
2025-07-01 17:49:07.441 if eqi is None:
2025-07-01 17:49:07.441 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.441 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.441 return
2025-07-01 17:49:07.441 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.441 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.441 else:
2025-07-01 17:49:07.441 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.441 eqi = None
2025-07-01 17:49:07.441
2025-07-01 17:49:07.441 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.442 # identical
2025-07-01 17:49:07.442
2025-07-01 17:49:07.442 # pump out diffs from before the synch point
2025-07-01 17:49:07.442 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.442
2025-07-01 17:49:07.442 # do intraline marking on the synch pair
2025-07-01 17:49:07.442 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.442 if eqi is None:
2025-07-01 17:49:07.442 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.442 atags = btags = ""
2025-07-01 17:49:07.442 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.442 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.442 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.442 if tag == 'replace':
2025-07-01 17:49:07.442 atags += '^' * la
2025-07-01 17:49:07.442 btags += '^' * lb
2025-07-01 17:49:07.442 elif tag == 'delete':
2025-07-01 17:49:07.442 atags += '-' * la
2025-07-01 17:49:07.442 elif tag == 'insert':
2025-07-01 17:49:07.442 btags += '+' * lb
2025-07-01 17:49:07.443 elif tag == 'equal':
2025-07-01 17:49:07.443 atags += ' ' * la
2025-07-01 17:49:07.443 btags += ' ' * lb
2025-07-01 17:49:07.443 else:
2025-07-01 17:49:07.443 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.443 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.443 else:
2025-07-01 17:49:07.443 # the synch pair is identical
2025-07-01 17:49:07.443 yield ' ' + aelt
2025-07-01 17:49:07.443
2025-07-01 17:49:07.443 # pump out diffs from after the synch point
2025-07-01 17:49:07.443 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.443
2025-07-01 17:49:07.443 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.443 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.443
2025-07-01 17:49:07.443 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.443 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.443 alo = 264, ahi = 1101
2025-07-01 17:49:07.443 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.444 blo = 264, bhi = 1101
2025-07-01 17:49:07.444
2025-07-01 17:49:07.444 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.444 g = []
2025-07-01 17:49:07.444 if alo < ahi:
2025-07-01 17:49:07.444 if blo < bhi:
2025-07-01 17:49:07.444 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.444 else:
2025-07-01 17:49:07.444 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.444 elif blo < bhi:
2025-07-01 17:49:07.444 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.444
2025-07-01 17:49:07.444 > yield from g
2025-07-01 17:49:07.444
2025-07-01 17:49:07.444 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.444 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.444
2025-07-01 17:49:07.444 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.444 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.445 alo = 264, ahi = 1101
2025-07-01 17:49:07.445 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.445 blo = 264, bhi = 1101
2025-07-01 17:49:07.445
2025-07-01 17:49:07.445 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.445 r"""
2025-07-01 17:49:07.445 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.445 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.445 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.445 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.445
2025-07-01 17:49:07.445 Example:
2025-07-01 17:49:07.445
2025-07-01 17:49:07.445 >>> d = Differ()
2025-07-01 17:49:07.445 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.445 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.445 >>> print(''.join(results), end="")
2025-07-01 17:49:07.445 - abcDefghiJkl
2025-07-01 17:49:07.445 + abcdefGhijkl
2025-07-01 17:49:07.446 """
2025-07-01 17:49:07.446
2025-07-01 17:49:07.446 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.446 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.446 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.446 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.446 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.446
2025-07-01 17:49:07.446 # search for the pair that matches best without being identical
2025-07-01 17:49:07.446 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.446 # on junk -- unless we have to)
2025-07-01 17:49:07.446 for j in range(blo, bhi):
2025-07-01 17:49:07.446 bj = b[j]
2025-07-01 17:49:07.446 cruncher.set_seq2(bj)
2025-07-01 17:49:07.446 for i in range(alo, ahi):
2025-07-01 17:49:07.446 ai = a[i]
2025-07-01 17:49:07.446 if ai == bj:
2025-07-01 17:49:07.446 if eqi is None:
2025-07-01 17:49:07.446 eqi, eqj = i, j
2025-07-01 17:49:07.446 continue
2025-07-01 17:49:07.447 cruncher.set_seq1(ai)
2025-07-01 17:49:07.447 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.447 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.447 # compares by a factor of 3.
2025-07-01 17:49:07.447 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.447 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.447 # of the computation is cached by cruncher
2025-07-01 17:49:07.447 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.447 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.447 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.447 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.447 if best_ratio < cutoff:
2025-07-01 17:49:07.447 # no non-identical "pretty close" pair
2025-07-01 17:49:07.447 if eqi is None:
2025-07-01 17:49:07.447 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.447 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.447 return
2025-07-01 17:49:07.447 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.447 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.447 else:
2025-07-01 17:49:07.448 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.448 eqi = None
2025-07-01 17:49:07.448
2025-07-01 17:49:07.448 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.448 # identical
2025-07-01 17:49:07.448
2025-07-01 17:49:07.448 # pump out diffs from before the synch point
2025-07-01 17:49:07.448 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.448
2025-07-01 17:49:07.448 # do intraline marking on the synch pair
2025-07-01 17:49:07.448 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.448 if eqi is None:
2025-07-01 17:49:07.448 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.448 atags = btags = ""
2025-07-01 17:49:07.448 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.448 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.448 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.448 if tag == 'replace':
2025-07-01 17:49:07.448 atags += '^' * la
2025-07-01 17:49:07.448 btags += '^' * lb
2025-07-01 17:49:07.448 elif tag == 'delete':
2025-07-01 17:49:07.449 atags += '-' * la
2025-07-01 17:49:07.449 elif tag == 'insert':
2025-07-01 17:49:07.449 btags += '+' * lb
2025-07-01 17:49:07.449 elif tag == 'equal':
2025-07-01 17:49:07.449 atags += ' ' * la
2025-07-01 17:49:07.449 btags += ' ' * lb
2025-07-01 17:49:07.449 else:
2025-07-01 17:49:07.449 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.449 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.449 else:
2025-07-01 17:49:07.449 # the synch pair is identical
2025-07-01 17:49:07.449 yield ' ' + aelt
2025-07-01 17:49:07.449
2025-07-01 17:49:07.449 # pump out diffs from after the synch point
2025-07-01 17:49:07.449 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.449
2025-07-01 17:49:07.449 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.449 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.449
2025-07-01 17:49:07.449 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.449 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.450 alo = 265, ahi = 1101
2025-07-01 17:49:07.450 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.450 blo = 265, bhi = 1101
2025-07-01 17:49:07.450
2025-07-01 17:49:07.450 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.450 g = []
2025-07-01 17:49:07.450 if alo < ahi:
2025-07-01 17:49:07.450 if blo < bhi:
2025-07-01 17:49:07.450 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.450 else:
2025-07-01 17:49:07.450 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.450 elif blo < bhi:
2025-07-01 17:49:07.450 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.450
2025-07-01 17:49:07.450 > yield from g
2025-07-01 17:49:07.450
2025-07-01 17:49:07.450 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.450 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.450
2025-07-01 17:49:07.450 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.451 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.451 alo = 265, ahi = 1101
2025-07-01 17:49:07.451 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.451 blo = 265, bhi = 1101
2025-07-01 17:49:07.451
2025-07-01 17:49:07.451 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.451 r"""
2025-07-01 17:49:07.451 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.451 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.451 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.451 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.451
2025-07-01 17:49:07.451 Example:
2025-07-01 17:49:07.451
2025-07-01 17:49:07.451 >>> d = Differ()
2025-07-01 17:49:07.451 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.451 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.451 >>> print(''.join(results), end="")
2025-07-01 17:49:07.451 - abcDefghiJkl
2025-07-01 17:49:07.451 + abcdefGhijkl
2025-07-01 17:49:07.452 """
2025-07-01 17:49:07.452
2025-07-01 17:49:07.452 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.452 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.452 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.452 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.452 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.452
2025-07-01 17:49:07.452 # search for the pair that matches best without being identical
2025-07-01 17:49:07.452 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.452 # on junk -- unless we have to)
2025-07-01 17:49:07.452 for j in range(blo, bhi):
2025-07-01 17:49:07.452 bj = b[j]
2025-07-01 17:49:07.452 cruncher.set_seq2(bj)
2025-07-01 17:49:07.452 for i in range(alo, ahi):
2025-07-01 17:49:07.452 ai = a[i]
2025-07-01 17:49:07.452 if ai == bj:
2025-07-01 17:49:07.452 if eqi is None:
2025-07-01 17:49:07.457 eqi, eqj = i, j
2025-07-01 17:49:07.457 continue
2025-07-01 17:49:07.458 cruncher.set_seq1(ai)
2025-07-01 17:49:07.458 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.458 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.458 # compares by a factor of 3.
2025-07-01 17:49:07.458 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.458 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.458 # of the computation is cached by cruncher
2025-07-01 17:49:07.458 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.458 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.458 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.458 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.458 if best_ratio < cutoff:
2025-07-01 17:49:07.458 # no non-identical "pretty close" pair
2025-07-01 17:49:07.458 if eqi is None:
2025-07-01 17:49:07.458 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.458 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.458 return
2025-07-01 17:49:07.458 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.458 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.459 else:
2025-07-01 17:49:07.459 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.459 eqi = None
2025-07-01 17:49:07.459
2025-07-01 17:49:07.459 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.459 # identical
2025-07-01 17:49:07.459
2025-07-01 17:49:07.459 # pump out diffs from before the synch point
2025-07-01 17:49:07.459 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.459
2025-07-01 17:49:07.459 # do intraline marking on the synch pair
2025-07-01 17:49:07.459 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.459 if eqi is None:
2025-07-01 17:49:07.459 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.459 atags = btags = ""
2025-07-01 17:49:07.459 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.459 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.459 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.459 if tag == 'replace':
2025-07-01 17:49:07.459 atags += '^' * la
2025-07-01 17:49:07.460 btags += '^' * lb
2025-07-01 17:49:07.460 elif tag == 'delete':
2025-07-01 17:49:07.460 atags += '-' * la
2025-07-01 17:49:07.460 elif tag == 'insert':
2025-07-01 17:49:07.460 btags += '+' * lb
2025-07-01 17:49:07.460 elif tag == 'equal':
2025-07-01 17:49:07.460 atags += ' ' * la
2025-07-01 17:49:07.460 btags += ' ' * lb
2025-07-01 17:49:07.460 else:
2025-07-01 17:49:07.460 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.460 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.460 else:
2025-07-01 17:49:07.460 # the synch pair is identical
2025-07-01 17:49:07.460 yield ' ' + aelt
2025-07-01 17:49:07.460
2025-07-01 17:49:07.460 # pump out diffs from after the synch point
2025-07-01 17:49:07.460 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.460
2025-07-01 17:49:07.460 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.460 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.461
2025-07-01 17:49:07.461 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.461 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.461 alo = 268, ahi = 1101
2025-07-01 17:49:07.461 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.461 blo = 268, bhi = 1101
2025-07-01 17:49:07.461
2025-07-01 17:49:07.461 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.461 g = []
2025-07-01 17:49:07.461 if alo < ahi:
2025-07-01 17:49:07.461 if blo < bhi:
2025-07-01 17:49:07.461 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.461 else:
2025-07-01 17:49:07.461 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.461 elif blo < bhi:
2025-07-01 17:49:07.461 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.461
2025-07-01 17:49:07.461 > yield from g
2025-07-01 17:49:07.461
2025-07-01 17:49:07.461 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.462 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.462
2025-07-01 17:49:07.462 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.462 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.462 alo = 268, ahi = 1101
2025-07-01 17:49:07.462 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.462 blo = 268, bhi = 1101
2025-07-01 17:49:07.462
2025-07-01 17:49:07.462 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.462 r"""
2025-07-01 17:49:07.462 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.462 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.462 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.462 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.462
2025-07-01 17:49:07.462 Example:
2025-07-01 17:49:07.462
2025-07-01 17:49:07.462 >>> d = Differ()
2025-07-01 17:49:07.462 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.462 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.463 >>> print(''.join(results), end="")
2025-07-01 17:49:07.463 - abcDefghiJkl
2025-07-01 17:49:07.463 + abcdefGhijkl
2025-07-01 17:49:07.463 """
2025-07-01 17:49:07.463
2025-07-01 17:49:07.463 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.463 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.463 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.463 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.463 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.463
2025-07-01 17:49:07.463 # search for the pair that matches best without being identical
2025-07-01 17:49:07.463 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.463 # on junk -- unless we have to)
2025-07-01 17:49:07.463 for j in range(blo, bhi):
2025-07-01 17:49:07.463 bj = b[j]
2025-07-01 17:49:07.463 cruncher.set_seq2(bj)
2025-07-01 17:49:07.463 for i in range(alo, ahi):
2025-07-01 17:49:07.463 ai = a[i]
2025-07-01 17:49:07.464 if ai == bj:
2025-07-01 17:49:07.464 if eqi is None:
2025-07-01 17:49:07.464 eqi, eqj = i, j
2025-07-01 17:49:07.464 continue
2025-07-01 17:49:07.464 cruncher.set_seq1(ai)
2025-07-01 17:49:07.464 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.464 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.464 # compares by a factor of 3.
2025-07-01 17:49:07.464 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.464 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.464 # of the computation is cached by cruncher
2025-07-01 17:49:07.464 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.464 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.464 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.464 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.464 if best_ratio < cutoff:
2025-07-01 17:49:07.464 # no non-identical "pretty close" pair
2025-07-01 17:49:07.464 if eqi is None:
2025-07-01 17:49:07.464 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.464 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.464 return
2025-07-01 17:49:07.465 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.465 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.465 else:
2025-07-01 17:49:07.465 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.465 eqi = None
2025-07-01 17:49:07.465
2025-07-01 17:49:07.465 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.465 # identical
2025-07-01 17:49:07.465
2025-07-01 17:49:07.465 # pump out diffs from before the synch point
2025-07-01 17:49:07.465 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.465
2025-07-01 17:49:07.465 # do intraline marking on the synch pair
2025-07-01 17:49:07.465 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.465 if eqi is None:
2025-07-01 17:49:07.465 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.465 atags = btags = ""
2025-07-01 17:49:07.465 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.465 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.465 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.466 if tag == 'replace':
2025-07-01 17:49:07.466 atags += '^' * la
2025-07-01 17:49:07.466 btags += '^' * lb
2025-07-01 17:49:07.466 elif tag == 'delete':
2025-07-01 17:49:07.466 atags += '-' * la
2025-07-01 17:49:07.466 elif tag == 'insert':
2025-07-01 17:49:07.466 btags += '+' * lb
2025-07-01 17:49:07.466 elif tag == 'equal':
2025-07-01 17:49:07.466 atags += ' ' * la
2025-07-01 17:49:07.466 btags += ' ' * lb
2025-07-01 17:49:07.466 else:
2025-07-01 17:49:07.466 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.466 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.466 else:
2025-07-01 17:49:07.466 # the synch pair is identical
2025-07-01 17:49:07.466 yield ' ' + aelt
2025-07-01 17:49:07.466
2025-07-01 17:49:07.466 # pump out diffs from after the synch point
2025-07-01 17:49:07.466 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.466
2025-07-01 17:49:07.467 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.467 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.467
2025-07-01 17:49:07.467 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.467 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.467 alo = 269, ahi = 1101
2025-07-01 17:49:07.467 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.467 blo = 269, bhi = 1101
2025-07-01 17:49:07.467
2025-07-01 17:49:07.467 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.467 g = []
2025-07-01 17:49:07.467 if alo < ahi:
2025-07-01 17:49:07.467 if blo < bhi:
2025-07-01 17:49:07.467 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.467 else:
2025-07-01 17:49:07.467 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.467 elif blo < bhi:
2025-07-01 17:49:07.467 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.467
2025-07-01 17:49:07.467 > yield from g
2025-07-01 17:49:07.468
2025-07-01 17:49:07.468 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.468 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.468
2025-07-01 17:49:07.468 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.468 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.468 alo = 269, ahi = 1101
2025-07-01 17:49:07.468 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.468 blo = 269, bhi = 1101
2025-07-01 17:49:07.468
2025-07-01 17:49:07.468 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.468 r"""
2025-07-01 17:49:07.468 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.468 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.468 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.468 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.468
2025-07-01 17:49:07.468 Example:
2025-07-01 17:49:07.468
2025-07-01 17:49:07.468 >>> d = Differ()
2025-07-01 17:49:07.469 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.469 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.471 >>> print(''.join(results), end="")
2025-07-01 17:49:07.472 - abcDefghiJkl
2025-07-01 17:49:07.472 + abcdefGhijkl
2025-07-01 17:49:07.472 """
2025-07-01 17:49:07.472
2025-07-01 17:49:07.472 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.472 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.472 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.472 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.472 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.472
2025-07-01 17:49:07.472 # search for the pair that matches best without being identical
2025-07-01 17:49:07.472 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.472 # on junk -- unless we have to)
2025-07-01 17:49:07.472 for j in range(blo, bhi):
2025-07-01 17:49:07.472 bj = b[j]
2025-07-01 17:49:07.472 cruncher.set_seq2(bj)
2025-07-01 17:49:07.472 for i in range(alo, ahi):
2025-07-01 17:49:07.472 ai = a[i]
2025-07-01 17:49:07.473 if ai == bj:
2025-07-01 17:49:07.473 if eqi is None:
2025-07-01 17:49:07.473 eqi, eqj = i, j
2025-07-01 17:49:07.473 continue
2025-07-01 17:49:07.473 cruncher.set_seq1(ai)
2025-07-01 17:49:07.473 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.473 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.473 # compares by a factor of 3.
2025-07-01 17:49:07.473 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.473 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.473 # of the computation is cached by cruncher
2025-07-01 17:49:07.473 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.473 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.473 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.473 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.473 if best_ratio < cutoff:
2025-07-01 17:49:07.473 # no non-identical "pretty close" pair
2025-07-01 17:49:07.473 if eqi is None:
2025-07-01 17:49:07.473 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.473 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.473 return
2025-07-01 17:49:07.474 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.474 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.474 else:
2025-07-01 17:49:07.474 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.474 eqi = None
2025-07-01 17:49:07.474
2025-07-01 17:49:07.474 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.474 # identical
2025-07-01 17:49:07.474
2025-07-01 17:49:07.474 # pump out diffs from before the synch point
2025-07-01 17:49:07.474 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.474
2025-07-01 17:49:07.474 # do intraline marking on the synch pair
2025-07-01 17:49:07.474 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.474 if eqi is None:
2025-07-01 17:49:07.474 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.474 atags = btags = ""
2025-07-01 17:49:07.474 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.474 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.474 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.474 if tag == 'replace':
2025-07-01 17:49:07.475 atags += '^' * la
2025-07-01 17:49:07.475 btags += '^' * lb
2025-07-01 17:49:07.475 elif tag == 'delete':
2025-07-01 17:49:07.475 atags += '-' * la
2025-07-01 17:49:07.475 elif tag == 'insert':
2025-07-01 17:49:07.475 btags += '+' * lb
2025-07-01 17:49:07.475 elif tag == 'equal':
2025-07-01 17:49:07.475 atags += ' ' * la
2025-07-01 17:49:07.475 btags += ' ' * lb
2025-07-01 17:49:07.475 else:
2025-07-01 17:49:07.475 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.475 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.475 else:
2025-07-01 17:49:07.475 # the synch pair is identical
2025-07-01 17:49:07.475 yield ' ' + aelt
2025-07-01 17:49:07.475
2025-07-01 17:49:07.475 # pump out diffs from after the synch point
2025-07-01 17:49:07.475 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.475
2025-07-01 17:49:07.475 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.476 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.476
2025-07-01 17:49:07.476 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.476 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.476 alo = 270, ahi = 1101
2025-07-01 17:49:07.476 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.476 blo = 270, bhi = 1101
2025-07-01 17:49:07.476
2025-07-01 17:49:07.476 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.476 g = []
2025-07-01 17:49:07.476 if alo < ahi:
2025-07-01 17:49:07.476 if blo < bhi:
2025-07-01 17:49:07.476 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.476 else:
2025-07-01 17:49:07.476 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.476 elif blo < bhi:
2025-07-01 17:49:07.476 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.476
2025-07-01 17:49:07.476 > yield from g
2025-07-01 17:49:07.476
2025-07-01 17:49:07.477 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.477 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.477
2025-07-01 17:49:07.477 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.477 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.477 alo = 270, ahi = 1101
2025-07-01 17:49:07.477 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.477 blo = 270, bhi = 1101
2025-07-01 17:49:07.477
2025-07-01 17:49:07.477 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.477 r"""
2025-07-01 17:49:07.477 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.477 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.477 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.477 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.477
2025-07-01 17:49:07.477 Example:
2025-07-01 17:49:07.477
2025-07-01 17:49:07.477 >>> d = Differ()
2025-07-01 17:49:07.478 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.478 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.478 >>> print(''.join(results), end="")
2025-07-01 17:49:07.478 - abcDefghiJkl
2025-07-01 17:49:07.478 + abcdefGhijkl
2025-07-01 17:49:07.478 """
2025-07-01 17:49:07.478
2025-07-01 17:49:07.478 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.478 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.478 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.478 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.478 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.478
2025-07-01 17:49:07.478 # search for the pair that matches best without being identical
2025-07-01 17:49:07.478 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.478 # on junk -- unless we have to)
2025-07-01 17:49:07.478 for j in range(blo, bhi):
2025-07-01 17:49:07.478 bj = b[j]
2025-07-01 17:49:07.479 cruncher.set_seq2(bj)
2025-07-01 17:49:07.479 for i in range(alo, ahi):
2025-07-01 17:49:07.479 ai = a[i]
2025-07-01 17:49:07.479 if ai == bj:
2025-07-01 17:49:07.479 if eqi is None:
2025-07-01 17:49:07.479 eqi, eqj = i, j
2025-07-01 17:49:07.479 continue
2025-07-01 17:49:07.479 cruncher.set_seq1(ai)
2025-07-01 17:49:07.479 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.479 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.479 # compares by a factor of 3.
2025-07-01 17:49:07.479 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.479 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.479 # of the computation is cached by cruncher
2025-07-01 17:49:07.479 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.479 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.479 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.479 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.479 if best_ratio < cutoff:
2025-07-01 17:49:07.479 # no non-identical "pretty close" pair
2025-07-01 17:49:07.480 if eqi is None:
2025-07-01 17:49:07.480 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.480 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.480 return
2025-07-01 17:49:07.480 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.480 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.480 else:
2025-07-01 17:49:07.480 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.480 eqi = None
2025-07-01 17:49:07.480
2025-07-01 17:49:07.480 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.480 # identical
2025-07-01 17:49:07.480
2025-07-01 17:49:07.480 # pump out diffs from before the synch point
2025-07-01 17:49:07.480 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.480
2025-07-01 17:49:07.480 # do intraline marking on the synch pair
2025-07-01 17:49:07.480 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.480 if eqi is None:
2025-07-01 17:49:07.480 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.480 atags = btags = ""
2025-07-01 17:49:07.481 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.481 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.481 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.481 if tag == 'replace':
2025-07-01 17:49:07.481 atags += '^' * la
2025-07-01 17:49:07.481 btags += '^' * lb
2025-07-01 17:49:07.481 elif tag == 'delete':
2025-07-01 17:49:07.481 atags += '-' * la
2025-07-01 17:49:07.481 elif tag == 'insert':
2025-07-01 17:49:07.481 btags += '+' * lb
2025-07-01 17:49:07.481 elif tag == 'equal':
2025-07-01 17:49:07.481 atags += ' ' * la
2025-07-01 17:49:07.481 btags += ' ' * lb
2025-07-01 17:49:07.481 else:
2025-07-01 17:49:07.481 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.481 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.481 else:
2025-07-01 17:49:07.481 # the synch pair is identical
2025-07-01 17:49:07.481 yield ' ' + aelt
2025-07-01 17:49:07.481
2025-07-01 17:49:07.481 # pump out diffs from after the synch point
2025-07-01 17:49:07.482 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.482
2025-07-01 17:49:07.482 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.482 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.482
2025-07-01 17:49:07.482 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.482 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.482 alo = 271, ahi = 1101
2025-07-01 17:49:07.482 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.482 blo = 271, bhi = 1101
2025-07-01 17:49:07.482
2025-07-01 17:49:07.482 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.482 g = []
2025-07-01 17:49:07.482 if alo < ahi:
2025-07-01 17:49:07.482 if blo < bhi:
2025-07-01 17:49:07.482 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.482 else:
2025-07-01 17:49:07.482 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.482 elif blo < bhi:
2025-07-01 17:49:07.482 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.482
2025-07-01 17:49:07.483 > yield from g
2025-07-01 17:49:07.483
2025-07-01 17:49:07.483 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.483 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.483
2025-07-01 17:49:07.483 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.483 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.483 alo = 271, ahi = 1101
2025-07-01 17:49:07.483 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.483 blo = 271, bhi = 1101
2025-07-01 17:49:07.483
2025-07-01 17:49:07.483 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.483 r"""
2025-07-01 17:49:07.483 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.483 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.483 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.483 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.483
2025-07-01 17:49:07.483 Example:
2025-07-01 17:49:07.483
2025-07-01 17:49:07.483 >>> d = Differ()
2025-07-01 17:49:07.483 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.489 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.489 >>> print(''.join(results), end="")
2025-07-01 17:49:07.489 - abcDefghiJkl
2025-07-01 17:49:07.490 + abcdefGhijkl
2025-07-01 17:49:07.490 """
2025-07-01 17:49:07.490
2025-07-01 17:49:07.490 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.490 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.490 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.490 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.490 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.490
2025-07-01 17:49:07.490 # search for the pair that matches best without being identical
2025-07-01 17:49:07.490 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.490 # on junk -- unless we have to)
2025-07-01 17:49:07.490 for j in range(blo, bhi):
2025-07-01 17:49:07.490 bj = b[j]
2025-07-01 17:49:07.490 cruncher.set_seq2(bj)
2025-07-01 17:49:07.490 for i in range(alo, ahi):
2025-07-01 17:49:07.490 ai = a[i]
2025-07-01 17:49:07.490 if ai == bj:
2025-07-01 17:49:07.491 if eqi is None:
2025-07-01 17:49:07.491 eqi, eqj = i, j
2025-07-01 17:49:07.491 continue
2025-07-01 17:49:07.491 cruncher.set_seq1(ai)
2025-07-01 17:49:07.491 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.491 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.491 # compares by a factor of 3.
2025-07-01 17:49:07.491 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.491 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.491 # of the computation is cached by cruncher
2025-07-01 17:49:07.491 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.491 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.491 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.491 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.491 if best_ratio < cutoff:
2025-07-01 17:49:07.491 # no non-identical "pretty close" pair
2025-07-01 17:49:07.491 if eqi is None:
2025-07-01 17:49:07.491 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.491 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.491 return
2025-07-01 17:49:07.492 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.492 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.492 else:
2025-07-01 17:49:07.492 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.492 eqi = None
2025-07-01 17:49:07.492
2025-07-01 17:49:07.492 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.492 # identical
2025-07-01 17:49:07.492
2025-07-01 17:49:07.492 # pump out diffs from before the synch point
2025-07-01 17:49:07.492 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.492
2025-07-01 17:49:07.492 # do intraline marking on the synch pair
2025-07-01 17:49:07.492 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.492 if eqi is None:
2025-07-01 17:49:07.492 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.492 atags = btags = ""
2025-07-01 17:49:07.492 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.492 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.493 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.493 if tag == 'replace':
2025-07-01 17:49:07.493 atags += '^' * la
2025-07-01 17:49:07.493 btags += '^' * lb
2025-07-01 17:49:07.493 elif tag == 'delete':
2025-07-01 17:49:07.493 atags += '-' * la
2025-07-01 17:49:07.493 elif tag == 'insert':
2025-07-01 17:49:07.493 btags += '+' * lb
2025-07-01 17:49:07.493 elif tag == 'equal':
2025-07-01 17:49:07.493 atags += ' ' * la
2025-07-01 17:49:07.493 btags += ' ' * lb
2025-07-01 17:49:07.493 else:
2025-07-01 17:49:07.493 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.493 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.493 else:
2025-07-01 17:49:07.493 # the synch pair is identical
2025-07-01 17:49:07.493 yield ' ' + aelt
2025-07-01 17:49:07.493
2025-07-01 17:49:07.493 # pump out diffs from after the synch point
2025-07-01 17:49:07.493 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.493
2025-07-01 17:49:07.494 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.494 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.494
2025-07-01 17:49:07.494 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.494 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.494 alo = 272, ahi = 1101
2025-07-01 17:49:07.494 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.494 blo = 272, bhi = 1101
2025-07-01 17:49:07.494
2025-07-01 17:49:07.494 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.494 g = []
2025-07-01 17:49:07.494 if alo < ahi:
2025-07-01 17:49:07.494 if blo < bhi:
2025-07-01 17:49:07.494 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.494 else:
2025-07-01 17:49:07.494 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.494 elif blo < bhi:
2025-07-01 17:49:07.494 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.494
2025-07-01 17:49:07.494 > yield from g
2025-07-01 17:49:07.494
2025-07-01 17:49:07.495 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.495 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.495
2025-07-01 17:49:07.495 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.495 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.495 alo = 272, ahi = 1101
2025-07-01 17:49:07.495 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.495 blo = 272, bhi = 1101
2025-07-01 17:49:07.495
2025-07-01 17:49:07.495 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.495 r"""
2025-07-01 17:49:07.495 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.495 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.495 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.495 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.495
2025-07-01 17:49:07.495 Example:
2025-07-01 17:49:07.495
2025-07-01 17:49:07.495 >>> d = Differ()
2025-07-01 17:49:07.495 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.495 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.496 >>> print(''.join(results), end="")
2025-07-01 17:49:07.496 - abcDefghiJkl
2025-07-01 17:49:07.496 + abcdefGhijkl
2025-07-01 17:49:07.496 """
2025-07-01 17:49:07.496
2025-07-01 17:49:07.496 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.496 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.496 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.496 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.496 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.496
2025-07-01 17:49:07.496 # search for the pair that matches best without being identical
2025-07-01 17:49:07.496 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.496 # on junk -- unless we have to)
2025-07-01 17:49:07.496 for j in range(blo, bhi):
2025-07-01 17:49:07.496 bj = b[j]
2025-07-01 17:49:07.496 cruncher.set_seq2(bj)
2025-07-01 17:49:07.496 for i in range(alo, ahi):
2025-07-01 17:49:07.496 ai = a[i]
2025-07-01 17:49:07.496 if ai == bj:
2025-07-01 17:49:07.497 if eqi is None:
2025-07-01 17:49:07.497 eqi, eqj = i, j
2025-07-01 17:49:07.497 continue
2025-07-01 17:49:07.497 cruncher.set_seq1(ai)
2025-07-01 17:49:07.497 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.497 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.497 # compares by a factor of 3.
2025-07-01 17:49:07.497 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.497 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.497 # of the computation is cached by cruncher
2025-07-01 17:49:07.497 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.497 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.497 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.497 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.497 if best_ratio < cutoff:
2025-07-01 17:49:07.497 # no non-identical "pretty close" pair
2025-07-01 17:49:07.497 if eqi is None:
2025-07-01 17:49:07.497 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.497 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.497 return
2025-07-01 17:49:07.497 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.498 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.498 else:
2025-07-01 17:49:07.498 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.498 eqi = None
2025-07-01 17:49:07.498
2025-07-01 17:49:07.498 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.498 # identical
2025-07-01 17:49:07.498
2025-07-01 17:49:07.498 # pump out diffs from before the synch point
2025-07-01 17:49:07.498 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.498
2025-07-01 17:49:07.498 # do intraline marking on the synch pair
2025-07-01 17:49:07.498 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.498 if eqi is None:
2025-07-01 17:49:07.498 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.498 atags = btags = ""
2025-07-01 17:49:07.498 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.498 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.498 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.498 if tag == 'replace':
2025-07-01 17:49:07.498 atags += '^' * la
2025-07-01 17:49:07.499 btags += '^' * lb
2025-07-01 17:49:07.499 elif tag == 'delete':
2025-07-01 17:49:07.499 atags += '-' * la
2025-07-01 17:49:07.499 elif tag == 'insert':
2025-07-01 17:49:07.499 btags += '+' * lb
2025-07-01 17:49:07.499 elif tag == 'equal':
2025-07-01 17:49:07.499 atags += ' ' * la
2025-07-01 17:49:07.499 btags += ' ' * lb
2025-07-01 17:49:07.499 else:
2025-07-01 17:49:07.499 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.499 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.499 else:
2025-07-01 17:49:07.499 # the synch pair is identical
2025-07-01 17:49:07.499 yield ' ' + aelt
2025-07-01 17:49:07.499
2025-07-01 17:49:07.499 # pump out diffs from after the synch point
2025-07-01 17:49:07.499 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.499
2025-07-01 17:49:07.499 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.499 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.499
2025-07-01 17:49:07.499 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.500 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.503 alo = 273, ahi = 1101
2025-07-01 17:49:07.503 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.503 blo = 273, bhi = 1101
2025-07-01 17:49:07.503
2025-07-01 17:49:07.503 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.503 g = []
2025-07-01 17:49:07.503 if alo < ahi:
2025-07-01 17:49:07.503 if blo < bhi:
2025-07-01 17:49:07.503 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.503 else:
2025-07-01 17:49:07.503 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.503 elif blo < bhi:
2025-07-01 17:49:07.503 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.503
2025-07-01 17:49:07.503 > yield from g
2025-07-01 17:49:07.503
2025-07-01 17:49:07.503 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.504 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.504
2025-07-01 17:49:07.504 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.504 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.504 alo = 273, ahi = 1101
2025-07-01 17:49:07.504 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.504 blo = 273, bhi = 1101
2025-07-01 17:49:07.504
2025-07-01 17:49:07.504 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.504 r"""
2025-07-01 17:49:07.504 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.504 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.504 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.504 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.504
2025-07-01 17:49:07.504 Example:
2025-07-01 17:49:07.504
2025-07-01 17:49:07.504 >>> d = Differ()
2025-07-01 17:49:07.504 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.504 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.505 >>> print(''.join(results), end="")
2025-07-01 17:49:07.505 - abcDefghiJkl
2025-07-01 17:49:07.505 + abcdefGhijkl
2025-07-01 17:49:07.505 """
2025-07-01 17:49:07.505
2025-07-01 17:49:07.505 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.505 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.505 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.505 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.505 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.505
2025-07-01 17:49:07.505 # search for the pair that matches best without being identical
2025-07-01 17:49:07.505 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.505 # on junk -- unless we have to)
2025-07-01 17:49:07.505 for j in range(blo, bhi):
2025-07-01 17:49:07.505 bj = b[j]
2025-07-01 17:49:07.505 cruncher.set_seq2(bj)
2025-07-01 17:49:07.505 for i in range(alo, ahi):
2025-07-01 17:49:07.505 ai = a[i]
2025-07-01 17:49:07.506 if ai == bj:
2025-07-01 17:49:07.506 if eqi is None:
2025-07-01 17:49:07.506 eqi, eqj = i, j
2025-07-01 17:49:07.506 continue
2025-07-01 17:49:07.506 cruncher.set_seq1(ai)
2025-07-01 17:49:07.506 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.506 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.506 # compares by a factor of 3.
2025-07-01 17:49:07.506 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.506 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.506 # of the computation is cached by cruncher
2025-07-01 17:49:07.506 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.506 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.506 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.506 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.506 if best_ratio < cutoff:
2025-07-01 17:49:07.506 # no non-identical "pretty close" pair
2025-07-01 17:49:07.506 if eqi is None:
2025-07-01 17:49:07.506 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.506 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.507 return
2025-07-01 17:49:07.507 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.507 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.507 else:
2025-07-01 17:49:07.507 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.507 eqi = None
2025-07-01 17:49:07.507
2025-07-01 17:49:07.507 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.507 # identical
2025-07-01 17:49:07.507
2025-07-01 17:49:07.507 # pump out diffs from before the synch point
2025-07-01 17:49:07.507 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.507
2025-07-01 17:49:07.507 # do intraline marking on the synch pair
2025-07-01 17:49:07.507 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.507 if eqi is None:
2025-07-01 17:49:07.507 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.507 atags = btags = ""
2025-07-01 17:49:07.507 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.507 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.507 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.508 if tag == 'replace':
2025-07-01 17:49:07.508 atags += '^' * la
2025-07-01 17:49:07.508 btags += '^' * lb
2025-07-01 17:49:07.508 elif tag == 'delete':
2025-07-01 17:49:07.508 atags += '-' * la
2025-07-01 17:49:07.508 elif tag == 'insert':
2025-07-01 17:49:07.508 btags += '+' * lb
2025-07-01 17:49:07.508 elif tag == 'equal':
2025-07-01 17:49:07.508 atags += ' ' * la
2025-07-01 17:49:07.508 btags += ' ' * lb
2025-07-01 17:49:07.508 else:
2025-07-01 17:49:07.508 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.508 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.508 else:
2025-07-01 17:49:07.508 # the synch pair is identical
2025-07-01 17:49:07.508 yield ' ' + aelt
2025-07-01 17:49:07.508
2025-07-01 17:49:07.508 # pump out diffs from after the synch point
2025-07-01 17:49:07.508 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.508
2025-07-01 17:49:07.508 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.509 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.509
2025-07-01 17:49:07.509 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.509 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.509 alo = 274, ahi = 1101
2025-07-01 17:49:07.509 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.509 blo = 274, bhi = 1101
2025-07-01 17:49:07.509
2025-07-01 17:49:07.509 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.509 g = []
2025-07-01 17:49:07.509 if alo < ahi:
2025-07-01 17:49:07.509 if blo < bhi:
2025-07-01 17:49:07.509 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.509 else:
2025-07-01 17:49:07.509 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.509 elif blo < bhi:
2025-07-01 17:49:07.509 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.509
2025-07-01 17:49:07.509 > yield from g
2025-07-01 17:49:07.509
2025-07-01 17:49:07.510 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.510 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.510
2025-07-01 17:49:07.510 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.510 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.510 alo = 274, ahi = 1101
2025-07-01 17:49:07.510 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.510 blo = 274, bhi = 1101
2025-07-01 17:49:07.510
2025-07-01 17:49:07.510 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.510 r"""
2025-07-01 17:49:07.510 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.510 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.510 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.510 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.510
2025-07-01 17:49:07.510 Example:
2025-07-01 17:49:07.510
2025-07-01 17:49:07.510 >>> d = Differ()
2025-07-01 17:49:07.510 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.511 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.511 >>> print(''.join(results), end="")
2025-07-01 17:49:07.511 - abcDefghiJkl
2025-07-01 17:49:07.511 + abcdefGhijkl
2025-07-01 17:49:07.511 """
2025-07-01 17:49:07.511
2025-07-01 17:49:07.511 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.511 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.511 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.511 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.511 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.511
2025-07-01 17:49:07.511 # search for the pair that matches best without being identical
2025-07-01 17:49:07.511 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.511 # on junk -- unless we have to)
2025-07-01 17:49:07.511 for j in range(blo, bhi):
2025-07-01 17:49:07.511 bj = b[j]
2025-07-01 17:49:07.511 cruncher.set_seq2(bj)
2025-07-01 17:49:07.511 for i in range(alo, ahi):
2025-07-01 17:49:07.512 ai = a[i]
2025-07-01 17:49:07.512 if ai == bj:
2025-07-01 17:49:07.512 if eqi is None:
2025-07-01 17:49:07.512 eqi, eqj = i, j
2025-07-01 17:49:07.512 continue
2025-07-01 17:49:07.512 cruncher.set_seq1(ai)
2025-07-01 17:49:07.512 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.512 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.512 # compares by a factor of 3.
2025-07-01 17:49:07.512 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.512 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.512 # of the computation is cached by cruncher
2025-07-01 17:49:07.512 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.512 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.512 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.512 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.512 if best_ratio < cutoff:
2025-07-01 17:49:07.512 # no non-identical "pretty close" pair
2025-07-01 17:49:07.512 if eqi is None:
2025-07-01 17:49:07.512 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.513 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.513 return
2025-07-01 17:49:07.513 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.513 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.513 else:
2025-07-01 17:49:07.513 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.513 eqi = None
2025-07-01 17:49:07.513
2025-07-01 17:49:07.513 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.513 # identical
2025-07-01 17:49:07.513
2025-07-01 17:49:07.513 # pump out diffs from before the synch point
2025-07-01 17:49:07.513 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.513
2025-07-01 17:49:07.513 # do intraline marking on the synch pair
2025-07-01 17:49:07.513 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.513 if eqi is None:
2025-07-01 17:49:07.513 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.513 atags = btags = ""
2025-07-01 17:49:07.513 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.514 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.514 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.514 if tag == 'replace':
2025-07-01 17:49:07.514 atags += '^' * la
2025-07-01 17:49:07.514 btags += '^' * lb
2025-07-01 17:49:07.514 elif tag == 'delete':
2025-07-01 17:49:07.514 atags += '-' * la
2025-07-01 17:49:07.514 elif tag == 'insert':
2025-07-01 17:49:07.514 btags += '+' * lb
2025-07-01 17:49:07.514 elif tag == 'equal':
2025-07-01 17:49:07.514 atags += ' ' * la
2025-07-01 17:49:07.514 btags += ' ' * lb
2025-07-01 17:49:07.514 else:
2025-07-01 17:49:07.514 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.514 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.514 else:
2025-07-01 17:49:07.514 # the synch pair is identical
2025-07-01 17:49:07.514 yield ' ' + aelt
2025-07-01 17:49:07.514
2025-07-01 17:49:07.514 # pump out diffs from after the synch point
2025-07-01 17:49:07.514 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.514
2025-07-01 17:49:07.515 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.515 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.515
2025-07-01 17:49:07.515 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.515 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.515 alo = 275, ahi = 1101
2025-07-01 17:49:07.515 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.515 blo = 275, bhi = 1101
2025-07-01 17:49:07.515
2025-07-01 17:49:07.515 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.515 g = []
2025-07-01 17:49:07.515 if alo < ahi:
2025-07-01 17:49:07.515 if blo < bhi:
2025-07-01 17:49:07.515 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.515 else:
2025-07-01 17:49:07.515 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.515 elif blo < bhi:
2025-07-01 17:49:07.515 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.515
2025-07-01 17:49:07.515 > yield from g
2025-07-01 17:49:07.515
2025-07-01 17:49:07.516 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.521 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.521
2025-07-01 17:49:07.521 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.521 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.521 alo = 275, ahi = 1101
2025-07-01 17:49:07.521 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.521 blo = 275, bhi = 1101
2025-07-01 17:49:07.521
2025-07-01 17:49:07.521 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.521 r"""
2025-07-01 17:49:07.521 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.521 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.521 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.521 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.521
2025-07-01 17:49:07.521 Example:
2025-07-01 17:49:07.522
2025-07-01 17:49:07.522 >>> d = Differ()
2025-07-01 17:49:07.522 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.522 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.522 >>> print(''.join(results), end="")
2025-07-01 17:49:07.522 - abcDefghiJkl
2025-07-01 17:49:07.522 + abcdefGhijkl
2025-07-01 17:49:07.522 """
2025-07-01 17:49:07.522
2025-07-01 17:49:07.522 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.522 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.522 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.522 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.522 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.522
2025-07-01 17:49:07.522 # search for the pair that matches best without being identical
2025-07-01 17:49:07.522 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.522 # on junk -- unless we have to)
2025-07-01 17:49:07.523 for j in range(blo, bhi):
2025-07-01 17:49:07.523 bj = b[j]
2025-07-01 17:49:07.523 cruncher.set_seq2(bj)
2025-07-01 17:49:07.523 for i in range(alo, ahi):
2025-07-01 17:49:07.523 ai = a[i]
2025-07-01 17:49:07.523 if ai == bj:
2025-07-01 17:49:07.523 if eqi is None:
2025-07-01 17:49:07.523 eqi, eqj = i, j
2025-07-01 17:49:07.523 continue
2025-07-01 17:49:07.523 cruncher.set_seq1(ai)
2025-07-01 17:49:07.523 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.523 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.523 # compares by a factor of 3.
2025-07-01 17:49:07.523 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.523 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.523 # of the computation is cached by cruncher
2025-07-01 17:49:07.523 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.523 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.523 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.523 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.523 if best_ratio < cutoff:
2025-07-01 17:49:07.524 # no non-identical "pretty close" pair
2025-07-01 17:49:07.524 if eqi is None:
2025-07-01 17:49:07.524 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.524 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.524 return
2025-07-01 17:49:07.524 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.524 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.524 else:
2025-07-01 17:49:07.524 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.524 eqi = None
2025-07-01 17:49:07.524
2025-07-01 17:49:07.524 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.524 # identical
2025-07-01 17:49:07.524
2025-07-01 17:49:07.524 # pump out diffs from before the synch point
2025-07-01 17:49:07.524 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.524
2025-07-01 17:49:07.524 # do intraline marking on the synch pair
2025-07-01 17:49:07.524 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.524 if eqi is None:
2025-07-01 17:49:07.525 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.525 atags = btags = ""
2025-07-01 17:49:07.525 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.525 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.525 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.525 if tag == 'replace':
2025-07-01 17:49:07.525 atags += '^' * la
2025-07-01 17:49:07.525 btags += '^' * lb
2025-07-01 17:49:07.525 elif tag == 'delete':
2025-07-01 17:49:07.525 atags += '-' * la
2025-07-01 17:49:07.525 elif tag == 'insert':
2025-07-01 17:49:07.525 btags += '+' * lb
2025-07-01 17:49:07.525 elif tag == 'equal':
2025-07-01 17:49:07.525 atags += ' ' * la
2025-07-01 17:49:07.525 btags += ' ' * lb
2025-07-01 17:49:07.525 else:
2025-07-01 17:49:07.525 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.525 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.525 else:
2025-07-01 17:49:07.526 # the synch pair is identical
2025-07-01 17:49:07.526 yield ' ' + aelt
2025-07-01 17:49:07.526
2025-07-01 17:49:07.526 # pump out diffs from after the synch point
2025-07-01 17:49:07.526 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.526
2025-07-01 17:49:07.526 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.526 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.526
2025-07-01 17:49:07.526 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.526 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.526 alo = 276, ahi = 1101
2025-07-01 17:49:07.526 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.526 blo = 276, bhi = 1101
2025-07-01 17:49:07.526
2025-07-01 17:49:07.526 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.526 g = []
2025-07-01 17:49:07.526 if alo < ahi:
2025-07-01 17:49:07.526 if blo < bhi:
2025-07-01 17:49:07.526 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.527 else:
2025-07-01 17:49:07.527 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.527 elif blo < bhi:
2025-07-01 17:49:07.527 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.527
2025-07-01 17:49:07.527 > yield from g
2025-07-01 17:49:07.527
2025-07-01 17:49:07.527 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.527 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.527
2025-07-01 17:49:07.527 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.527 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.527 alo = 276, ahi = 1101
2025-07-01 17:49:07.527 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.527 blo = 276, bhi = 1101
2025-07-01 17:49:07.527
2025-07-01 17:49:07.527 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.527 r"""
2025-07-01 17:49:07.527 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.527 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.528 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.528 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.528
2025-07-01 17:49:07.528 Example:
2025-07-01 17:49:07.528
2025-07-01 17:49:07.528 >>> d = Differ()
2025-07-01 17:49:07.528 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.528 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.528 >>> print(''.join(results), end="")
2025-07-01 17:49:07.528 - abcDefghiJkl
2025-07-01 17:49:07.528 + abcdefGhijkl
2025-07-01 17:49:07.528 """
2025-07-01 17:49:07.528
2025-07-01 17:49:07.528 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.528 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.528 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.528 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.528 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.528
2025-07-01 17:49:07.529 # search for the pair that matches best without being identical
2025-07-01 17:49:07.529 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.529 # on junk -- unless we have to)
2025-07-01 17:49:07.529 for j in range(blo, bhi):
2025-07-01 17:49:07.529 bj = b[j]
2025-07-01 17:49:07.529 cruncher.set_seq2(bj)
2025-07-01 17:49:07.529 for i in range(alo, ahi):
2025-07-01 17:49:07.529 ai = a[i]
2025-07-01 17:49:07.529 if ai == bj:
2025-07-01 17:49:07.529 if eqi is None:
2025-07-01 17:49:07.529 eqi, eqj = i, j
2025-07-01 17:49:07.529 continue
2025-07-01 17:49:07.529 cruncher.set_seq1(ai)
2025-07-01 17:49:07.529 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.529 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.529 # compares by a factor of 3.
2025-07-01 17:49:07.529 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.529 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.529 # of the computation is cached by cruncher
2025-07-01 17:49:07.529 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.530 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.530 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.530 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.530 if best_ratio < cutoff:
2025-07-01 17:49:07.530 # no non-identical "pretty close" pair
2025-07-01 17:49:07.530 if eqi is None:
2025-07-01 17:49:07.530 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.530 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.530 return
2025-07-01 17:49:07.530 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.530 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.530 else:
2025-07-01 17:49:07.530 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.530 eqi = None
2025-07-01 17:49:07.530
2025-07-01 17:49:07.530 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.530 # identical
2025-07-01 17:49:07.530
2025-07-01 17:49:07.530 # pump out diffs from before the synch point
2025-07-01 17:49:07.530 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.530
2025-07-01 17:49:07.531 # do intraline marking on the synch pair
2025-07-01 17:49:07.531 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.531 if eqi is None:
2025-07-01 17:49:07.531 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.531 atags = btags = ""
2025-07-01 17:49:07.531 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.531 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.531 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.531 if tag == 'replace':
2025-07-01 17:49:07.531 atags += '^' * la
2025-07-01 17:49:07.531 btags += '^' * lb
2025-07-01 17:49:07.531 elif tag == 'delete':
2025-07-01 17:49:07.531 atags += '-' * la
2025-07-01 17:49:07.531 elif tag == 'insert':
2025-07-01 17:49:07.531 btags += '+' * lb
2025-07-01 17:49:07.531 elif tag == 'equal':
2025-07-01 17:49:07.531 atags += ' ' * la
2025-07-01 17:49:07.531 btags += ' ' * lb
2025-07-01 17:49:07.531 else:
2025-07-01 17:49:07.531 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.532 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.535 else:
2025-07-01 17:49:07.535 # the synch pair is identical
2025-07-01 17:49:07.535 yield ' ' + aelt
2025-07-01 17:49:07.535
2025-07-01 17:49:07.535 # pump out diffs from after the synch point
2025-07-01 17:49:07.535 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.535
2025-07-01 17:49:07.535 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.535 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.535
2025-07-01 17:49:07.535 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.535 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.535 alo = 277, ahi = 1101
2025-07-01 17:49:07.535 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.535 blo = 277, bhi = 1101
2025-07-01 17:49:07.535
2025-07-01 17:49:07.535 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.535 g = []
2025-07-01 17:49:07.535 if alo < ahi:
2025-07-01 17:49:07.536 if blo < bhi:
2025-07-01 17:49:07.536 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.536 else:
2025-07-01 17:49:07.536 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.536 elif blo < bhi:
2025-07-01 17:49:07.536 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.536
2025-07-01 17:49:07.536 > yield from g
2025-07-01 17:49:07.536
2025-07-01 17:49:07.536 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.536 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.536
2025-07-01 17:49:07.536 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.536 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.536 alo = 277, ahi = 1101
2025-07-01 17:49:07.536 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.536 blo = 277, bhi = 1101
2025-07-01 17:49:07.536
2025-07-01 17:49:07.536 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.537 r"""
2025-07-01 17:49:07.537 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.537 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.537 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.537 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.537
2025-07-01 17:49:07.537 Example:
2025-07-01 17:49:07.537
2025-07-01 17:49:07.537 >>> d = Differ()
2025-07-01 17:49:07.537 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.537 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.537 >>> print(''.join(results), end="")
2025-07-01 17:49:07.537 - abcDefghiJkl
2025-07-01 17:49:07.537 + abcdefGhijkl
2025-07-01 17:49:07.537 """
2025-07-01 17:49:07.537
2025-07-01 17:49:07.537 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.537 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.537 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.537 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.537 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.537
2025-07-01 17:49:07.537 # search for the pair that matches best without being identical
2025-07-01 17:49:07.537 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.537 # on junk -- unless we have to)
2025-07-01 17:49:07.537 for j in range(blo, bhi):
2025-07-01 17:49:07.537 bj = b[j]
2025-07-01 17:49:07.537 cruncher.set_seq2(bj)
2025-07-01 17:49:07.537 for i in range(alo, ahi):
2025-07-01 17:49:07.537 ai = a[i]
2025-07-01 17:49:07.537 if ai == bj:
2025-07-01 17:49:07.537 if eqi is None:
2025-07-01 17:49:07.537 eqi, eqj = i, j
2025-07-01 17:49:07.537 continue
2025-07-01 17:49:07.538 cruncher.set_seq1(ai)
2025-07-01 17:49:07.538 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.538 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.538 # compares by a factor of 3.
2025-07-01 17:49:07.538 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.538 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.538 # of the computation is cached by cruncher
2025-07-01 17:49:07.538 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.538 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.538 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.538 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.538 if best_ratio < cutoff:
2025-07-01 17:49:07.538 # no non-identical "pretty close" pair
2025-07-01 17:49:07.538 if eqi is None:
2025-07-01 17:49:07.538 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.538 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.538 return
2025-07-01 17:49:07.538 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.538 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.538 else:
2025-07-01 17:49:07.538 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.539 eqi = None
2025-07-01 17:49:07.539
2025-07-01 17:49:07.539 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.539 # identical
2025-07-01 17:49:07.539
2025-07-01 17:49:07.539 # pump out diffs from before the synch point
2025-07-01 17:49:07.539 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.539
2025-07-01 17:49:07.539 # do intraline marking on the synch pair
2025-07-01 17:49:07.539 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.539 if eqi is None:
2025-07-01 17:49:07.539 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.539 atags = btags = ""
2025-07-01 17:49:07.539 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.539 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.539 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.539 if tag == 'replace':
2025-07-01 17:49:07.539 atags += '^' * la
2025-07-01 17:49:07.539 btags += '^' * lb
2025-07-01 17:49:07.539 elif tag == 'delete':
2025-07-01 17:49:07.539 atags += '-' * la
2025-07-01 17:49:07.540 elif tag == 'insert':
2025-07-01 17:49:07.540 btags += '+' * lb
2025-07-01 17:49:07.540 elif tag == 'equal':
2025-07-01 17:49:07.540 atags += ' ' * la
2025-07-01 17:49:07.540 btags += ' ' * lb
2025-07-01 17:49:07.540 else:
2025-07-01 17:49:07.540 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.540 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.540 else:
2025-07-01 17:49:07.540 # the synch pair is identical
2025-07-01 17:49:07.540 yield ' ' + aelt
2025-07-01 17:49:07.540
2025-07-01 17:49:07.540 # pump out diffs from after the synch point
2025-07-01 17:49:07.540 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.540
2025-07-01 17:49:07.540 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.540 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.540
2025-07-01 17:49:07.540 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.540 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.540 alo = 278, ahi = 1101
2025-07-01 17:49:07.541 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.541 blo = 278, bhi = 1101
2025-07-01 17:49:07.541
2025-07-01 17:49:07.541 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.541 g = []
2025-07-01 17:49:07.541 if alo < ahi:
2025-07-01 17:49:07.541 if blo < bhi:
2025-07-01 17:49:07.541 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.541 else:
2025-07-01 17:49:07.541 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.541 elif blo < bhi:
2025-07-01 17:49:07.541 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.541
2025-07-01 17:49:07.541 > yield from g
2025-07-01 17:49:07.541
2025-07-01 17:49:07.541 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.541 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.541
2025-07-01 17:49:07.541 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.541 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.541 alo = 278, ahi = 1101
2025-07-01 17:49:07.542 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.542 blo = 278, bhi = 1101
2025-07-01 17:49:07.542
2025-07-01 17:49:07.542 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.542 r"""
2025-07-01 17:49:07.542 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.542 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.542 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.542 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.542
2025-07-01 17:49:07.542 Example:
2025-07-01 17:49:07.542
2025-07-01 17:49:07.542 >>> d = Differ()
2025-07-01 17:49:07.542 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.542 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.542 >>> print(''.join(results), end="")
2025-07-01 17:49:07.542 - abcDefghiJkl
2025-07-01 17:49:07.542 + abcdefGhijkl
2025-07-01 17:49:07.542 """
2025-07-01 17:49:07.543
2025-07-01 17:49:07.543 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.543 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.543 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.543 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.543 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.543
2025-07-01 17:49:07.543 # search for the pair that matches best without being identical
2025-07-01 17:49:07.543 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.543 # on junk -- unless we have to)
2025-07-01 17:49:07.543 for j in range(blo, bhi):
2025-07-01 17:49:07.543 bj = b[j]
2025-07-01 17:49:07.543 cruncher.set_seq2(bj)
2025-07-01 17:49:07.543 for i in range(alo, ahi):
2025-07-01 17:49:07.543 ai = a[i]
2025-07-01 17:49:07.543 if ai == bj:
2025-07-01 17:49:07.543 if eqi is None:
2025-07-01 17:49:07.543 eqi, eqj = i, j
2025-07-01 17:49:07.543 continue
2025-07-01 17:49:07.543 cruncher.set_seq1(ai)
2025-07-01 17:49:07.543 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.544 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.544 # compares by a factor of 3.
2025-07-01 17:49:07.544 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.544 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.544 # of the computation is cached by cruncher
2025-07-01 17:49:07.544 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.544 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.544 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.544 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.544 if best_ratio < cutoff:
2025-07-01 17:49:07.544 # no non-identical "pretty close" pair
2025-07-01 17:49:07.544 if eqi is None:
2025-07-01 17:49:07.544 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.544 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.544 return
2025-07-01 17:49:07.544 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.544 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.544 else:
2025-07-01 17:49:07.544 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.545 eqi = None
2025-07-01 17:49:07.545
2025-07-01 17:49:07.545 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.545 # identical
2025-07-01 17:49:07.545
2025-07-01 17:49:07.545 # pump out diffs from before the synch point
2025-07-01 17:49:07.545 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.545
2025-07-01 17:49:07.545 # do intraline marking on the synch pair
2025-07-01 17:49:07.545 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.545 if eqi is None:
2025-07-01 17:49:07.545 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.545 atags = btags = ""
2025-07-01 17:49:07.545 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.545 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.545 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.545 if tag == 'replace':
2025-07-01 17:49:07.545 atags += '^' * la
2025-07-01 17:49:07.545 btags += '^' * lb
2025-07-01 17:49:07.545 elif tag == 'delete':
2025-07-01 17:49:07.545 atags += '-' * la
2025-07-01 17:49:07.546 elif tag == 'insert':
2025-07-01 17:49:07.546 btags += '+' * lb
2025-07-01 17:49:07.546 elif tag == 'equal':
2025-07-01 17:49:07.546 atags += ' ' * la
2025-07-01 17:49:07.546 btags += ' ' * lb
2025-07-01 17:49:07.546 else:
2025-07-01 17:49:07.546 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.546 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.546 else:
2025-07-01 17:49:07.546 # the synch pair is identical
2025-07-01 17:49:07.546 yield ' ' + aelt
2025-07-01 17:49:07.546
2025-07-01 17:49:07.546 # pump out diffs from after the synch point
2025-07-01 17:49:07.546 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.546
2025-07-01 17:49:07.546 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.546 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.546
2025-07-01 17:49:07.546 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.546 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.546 alo = 279, ahi = 1101
2025-07-01 17:49:07.552 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.552 blo = 279, bhi = 1101
2025-07-01 17:49:07.552
2025-07-01 17:49:07.552 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.552 g = []
2025-07-01 17:49:07.552 if alo < ahi:
2025-07-01 17:49:07.553 if blo < bhi:
2025-07-01 17:49:07.553 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.553 else:
2025-07-01 17:49:07.553 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.553 elif blo < bhi:
2025-07-01 17:49:07.553 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.553
2025-07-01 17:49:07.553 > yield from g
2025-07-01 17:49:07.553
2025-07-01 17:49:07.553 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.553 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.553
2025-07-01 17:49:07.553 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.553 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.553 alo = 279, ahi = 1101
2025-07-01 17:49:07.553 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.553 blo = 279, bhi = 1101
2025-07-01 17:49:07.553
2025-07-01 17:49:07.553 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.553 r"""
2025-07-01 17:49:07.553 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.554 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.554 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.554 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.554
2025-07-01 17:49:07.554 Example:
2025-07-01 17:49:07.554
2025-07-01 17:49:07.554 >>> d = Differ()
2025-07-01 17:49:07.554 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.554 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.554 >>> print(''.join(results), end="")
2025-07-01 17:49:07.554 - abcDefghiJkl
2025-07-01 17:49:07.554 + abcdefGhijkl
2025-07-01 17:49:07.554 """
2025-07-01 17:49:07.554
2025-07-01 17:49:07.554 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.554 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.554 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.554 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.554 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.555
2025-07-01 17:49:07.555 # search for the pair that matches best without being identical
2025-07-01 17:49:07.555 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.555 # on junk -- unless we have to)
2025-07-01 17:49:07.555 for j in range(blo, bhi):
2025-07-01 17:49:07.555 bj = b[j]
2025-07-01 17:49:07.555 cruncher.set_seq2(bj)
2025-07-01 17:49:07.555 for i in range(alo, ahi):
2025-07-01 17:49:07.555 ai = a[i]
2025-07-01 17:49:07.555 if ai == bj:
2025-07-01 17:49:07.555 if eqi is None:
2025-07-01 17:49:07.555 eqi, eqj = i, j
2025-07-01 17:49:07.555 continue
2025-07-01 17:49:07.555 cruncher.set_seq1(ai)
2025-07-01 17:49:07.555 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.555 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.555 # compares by a factor of 3.
2025-07-01 17:49:07.555 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.555 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.555 # of the computation is cached by cruncher
2025-07-01 17:49:07.555 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.556 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.556 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.556 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.556 if best_ratio < cutoff:
2025-07-01 17:49:07.556 # no non-identical "pretty close" pair
2025-07-01 17:49:07.556 if eqi is None:
2025-07-01 17:49:07.556 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.556 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.556 return
2025-07-01 17:49:07.556 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.556 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.556 else:
2025-07-01 17:49:07.556 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.556 eqi = None
2025-07-01 17:49:07.556
2025-07-01 17:49:07.556 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.556 # identical
2025-07-01 17:49:07.556
2025-07-01 17:49:07.556 # pump out diffs from before the synch point
2025-07-01 17:49:07.556 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.556
2025-07-01 17:49:07.556 # do intraline marking on the synch pair
2025-07-01 17:49:07.557 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.557 if eqi is None:
2025-07-01 17:49:07.557 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.557 atags = btags = ""
2025-07-01 17:49:07.557 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.557 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.557 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.557 if tag == 'replace':
2025-07-01 17:49:07.557 atags += '^' * la
2025-07-01 17:49:07.557 btags += '^' * lb
2025-07-01 17:49:07.557 elif tag == 'delete':
2025-07-01 17:49:07.557 atags += '-' * la
2025-07-01 17:49:07.557 elif tag == 'insert':
2025-07-01 17:49:07.557 btags += '+' * lb
2025-07-01 17:49:07.557 elif tag == 'equal':
2025-07-01 17:49:07.557 atags += ' ' * la
2025-07-01 17:49:07.557 btags += ' ' * lb
2025-07-01 17:49:07.557 else:
2025-07-01 17:49:07.557 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.557 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.557 else:
2025-07-01 17:49:07.558 # the synch pair is identical
2025-07-01 17:49:07.558 yield ' ' + aelt
2025-07-01 17:49:07.558
2025-07-01 17:49:07.558 # pump out diffs from after the synch point
2025-07-01 17:49:07.558 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.558
2025-07-01 17:49:07.558 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.558 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.558
2025-07-01 17:49:07.558 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.558 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.558 alo = 280, ahi = 1101
2025-07-01 17:49:07.558 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.558 blo = 280, bhi = 1101
2025-07-01 17:49:07.558
2025-07-01 17:49:07.558 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.558 g = []
2025-07-01 17:49:07.558 if alo < ahi:
2025-07-01 17:49:07.558 if blo < bhi:
2025-07-01 17:49:07.559 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.559 else:
2025-07-01 17:49:07.559 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.559 elif blo < bhi:
2025-07-01 17:49:07.559 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.559
2025-07-01 17:49:07.559 > yield from g
2025-07-01 17:49:07.559
2025-07-01 17:49:07.559 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.559 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.559
2025-07-01 17:49:07.559 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.559 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.559 alo = 280, ahi = 1101
2025-07-01 17:49:07.559 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.559 blo = 280, bhi = 1101
2025-07-01 17:49:07.559
2025-07-01 17:49:07.559 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.559 r"""
2025-07-01 17:49:07.559 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.560 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.560 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.560 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.560
2025-07-01 17:49:07.560 Example:
2025-07-01 17:49:07.560
2025-07-01 17:49:07.560 >>> d = Differ()
2025-07-01 17:49:07.560 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.560 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.560 >>> print(''.join(results), end="")
2025-07-01 17:49:07.560 - abcDefghiJkl
2025-07-01 17:49:07.560 + abcdefGhijkl
2025-07-01 17:49:07.560 """
2025-07-01 17:49:07.560
2025-07-01 17:49:07.560 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.560 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.560 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.560 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.560 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.561
2025-07-01 17:49:07.561 # search for the pair that matches best without being identical
2025-07-01 17:49:07.561 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.561 # on junk -- unless we have to)
2025-07-01 17:49:07.561 for j in range(blo, bhi):
2025-07-01 17:49:07.561 bj = b[j]
2025-07-01 17:49:07.561 cruncher.set_seq2(bj)
2025-07-01 17:49:07.561 for i in range(alo, ahi):
2025-07-01 17:49:07.561 ai = a[i]
2025-07-01 17:49:07.561 if ai == bj:
2025-07-01 17:49:07.561 if eqi is None:
2025-07-01 17:49:07.561 eqi, eqj = i, j
2025-07-01 17:49:07.561 continue
2025-07-01 17:49:07.561 cruncher.set_seq1(ai)
2025-07-01 17:49:07.561 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.561 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.561 # compares by a factor of 3.
2025-07-01 17:49:07.561 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.561 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.561 # of the computation is cached by cruncher
2025-07-01 17:49:07.561 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.562 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.562 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.562 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.562 if best_ratio < cutoff:
2025-07-01 17:49:07.562 # no non-identical "pretty close" pair
2025-07-01 17:49:07.562 if eqi is None:
2025-07-01 17:49:07.562 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.562 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.562 return
2025-07-01 17:49:07.562 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.562 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.562 else:
2025-07-01 17:49:07.562 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.562 eqi = None
2025-07-01 17:49:07.562
2025-07-01 17:49:07.562 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.562 # identical
2025-07-01 17:49:07.562
2025-07-01 17:49:07.562 # pump out diffs from before the synch point
2025-07-01 17:49:07.562 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.562
2025-07-01 17:49:07.562 # do intraline marking on the synch pair
2025-07-01 17:49:07.563 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.565 if eqi is None:
2025-07-01 17:49:07.566 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.566 atags = btags = ""
2025-07-01 17:49:07.566 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.566 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.566 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.566 if tag == 'replace':
2025-07-01 17:49:07.566 atags += '^' * la
2025-07-01 17:49:07.566 btags += '^' * lb
2025-07-01 17:49:07.566 elif tag == 'delete':
2025-07-01 17:49:07.566 atags += '-' * la
2025-07-01 17:49:07.566 elif tag == 'insert':
2025-07-01 17:49:07.566 btags += '+' * lb
2025-07-01 17:49:07.566 elif tag == 'equal':
2025-07-01 17:49:07.566 atags += ' ' * la
2025-07-01 17:49:07.566 btags += ' ' * lb
2025-07-01 17:49:07.566 else:
2025-07-01 17:49:07.566 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.566 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.566 else:
2025-07-01 17:49:07.566 # the synch pair is identical
2025-07-01 17:49:07.566 yield ' ' + aelt
2025-07-01 17:49:07.567
2025-07-01 17:49:07.567 # pump out diffs from after the synch point
2025-07-01 17:49:07.567 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.567
2025-07-01 17:49:07.567 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.567 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.567
2025-07-01 17:49:07.567 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.567 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.567 alo = 281, ahi = 1101
2025-07-01 17:49:07.567 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.567 blo = 281, bhi = 1101
2025-07-01 17:49:07.567
2025-07-01 17:49:07.567 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.567 g = []
2025-07-01 17:49:07.567 if alo < ahi:
2025-07-01 17:49:07.567 if blo < bhi:
2025-07-01 17:49:07.567 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.567 else:
2025-07-01 17:49:07.567 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.568 elif blo < bhi:
2025-07-01 17:49:07.568 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.568
2025-07-01 17:49:07.568 > yield from g
2025-07-01 17:49:07.568
2025-07-01 17:49:07.568 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.568 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.568
2025-07-01 17:49:07.568 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.568 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.568 alo = 281, ahi = 1101
2025-07-01 17:49:07.568 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.568 blo = 281, bhi = 1101
2025-07-01 17:49:07.568
2025-07-01 17:49:07.568 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.568 r"""
2025-07-01 17:49:07.568 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.568 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.568 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.568 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.569
2025-07-01 17:49:07.569 Example:
2025-07-01 17:49:07.569
2025-07-01 17:49:07.569 >>> d = Differ()
2025-07-01 17:49:07.569 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.569 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.569 >>> print(''.join(results), end="")
2025-07-01 17:49:07.569 - abcDefghiJkl
2025-07-01 17:49:07.569 + abcdefGhijkl
2025-07-01 17:49:07.569 """
2025-07-01 17:49:07.569
2025-07-01 17:49:07.569 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.569 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.569 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.569 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.569 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.570
2025-07-01 17:49:07.570 # search for the pair that matches best without being identical
2025-07-01 17:49:07.570 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.570 # on junk -- unless we have to)
2025-07-01 17:49:07.570 for j in range(blo, bhi):
2025-07-01 17:49:07.570 bj = b[j]
2025-07-01 17:49:07.570 cruncher.set_seq2(bj)
2025-07-01 17:49:07.570 for i in range(alo, ahi):
2025-07-01 17:49:07.570 ai = a[i]
2025-07-01 17:49:07.570 if ai == bj:
2025-07-01 17:49:07.570 if eqi is None:
2025-07-01 17:49:07.570 eqi, eqj = i, j
2025-07-01 17:49:07.570 continue
2025-07-01 17:49:07.570 cruncher.set_seq1(ai)
2025-07-01 17:49:07.570 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.570 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.570 # compares by a factor of 3.
2025-07-01 17:49:07.570 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.570 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.570 # of the computation is cached by cruncher
2025-07-01 17:49:07.571 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.571 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.571 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.571 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.571 if best_ratio < cutoff:
2025-07-01 17:49:07.571 # no non-identical "pretty close" pair
2025-07-01 17:49:07.571 if eqi is None:
2025-07-01 17:49:07.571 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.571 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.571 return
2025-07-01 17:49:07.571 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.571 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.571 else:
2025-07-01 17:49:07.571 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.571 eqi = None
2025-07-01 17:49:07.571
2025-07-01 17:49:07.571 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.571 # identical
2025-07-01 17:49:07.571
2025-07-01 17:49:07.571 # pump out diffs from before the synch point
2025-07-01 17:49:07.571 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.572
2025-07-01 17:49:07.572 # do intraline marking on the synch pair
2025-07-01 17:49:07.572 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.572 if eqi is None:
2025-07-01 17:49:07.572 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.572 atags = btags = ""
2025-07-01 17:49:07.572 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.572 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.572 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.572 if tag == 'replace':
2025-07-01 17:49:07.572 atags += '^' * la
2025-07-01 17:49:07.572 btags += '^' * lb
2025-07-01 17:49:07.572 elif tag == 'delete':
2025-07-01 17:49:07.572 atags += '-' * la
2025-07-01 17:49:07.572 elif tag == 'insert':
2025-07-01 17:49:07.572 btags += '+' * lb
2025-07-01 17:49:07.572 elif tag == 'equal':
2025-07-01 17:49:07.572 atags += ' ' * la
2025-07-01 17:49:07.572 btags += ' ' * lb
2025-07-01 17:49:07.572 else:
2025-07-01 17:49:07.573 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.573 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.573 else:
2025-07-01 17:49:07.573 # the synch pair is identical
2025-07-01 17:49:07.573 yield ' ' + aelt
2025-07-01 17:49:07.573
2025-07-01 17:49:07.573 # pump out diffs from after the synch point
2025-07-01 17:49:07.573 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.573
2025-07-01 17:49:07.573 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.573 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.573
2025-07-01 17:49:07.573 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.573 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.573 alo = 282, ahi = 1101
2025-07-01 17:49:07.573 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.573 blo = 282, bhi = 1101
2025-07-01 17:49:07.573
2025-07-01 17:49:07.573 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.573 g = []
2025-07-01 17:49:07.573 if alo < ahi:
2025-07-01 17:49:07.574 if blo < bhi:
2025-07-01 17:49:07.574 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.574 else:
2025-07-01 17:49:07.574 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.574 elif blo < bhi:
2025-07-01 17:49:07.574 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.574
2025-07-01 17:49:07.574 > yield from g
2025-07-01 17:49:07.574
2025-07-01 17:49:07.574 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.574 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.574
2025-07-01 17:49:07.574 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.574 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.574 alo = 282, ahi = 1101
2025-07-01 17:49:07.574 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.574 blo = 282, bhi = 1101
2025-07-01 17:49:07.574
2025-07-01 17:49:07.574 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.575 r"""
2025-07-01 17:49:07.575 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.575 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.575 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.575 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.575
2025-07-01 17:49:07.575 Example:
2025-07-01 17:49:07.575
2025-07-01 17:49:07.575 >>> d = Differ()
2025-07-01 17:49:07.575 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.575 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.575 >>> print(''.join(results), end="")
2025-07-01 17:49:07.575 - abcDefghiJkl
2025-07-01 17:49:07.575 + abcdefGhijkl
2025-07-01 17:49:07.575 """
2025-07-01 17:49:07.575
2025-07-01 17:49:07.575 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.575 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.575 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.576 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.576 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.576
2025-07-01 17:49:07.576 # search for the pair that matches best without being identical
2025-07-01 17:49:07.576 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.576 # on junk -- unless we have to)
2025-07-01 17:49:07.576 for j in range(blo, bhi):
2025-07-01 17:49:07.576 bj = b[j]
2025-07-01 17:49:07.576 cruncher.set_seq2(bj)
2025-07-01 17:49:07.576 for i in range(alo, ahi):
2025-07-01 17:49:07.576 ai = a[i]
2025-07-01 17:49:07.576 if ai == bj:
2025-07-01 17:49:07.576 if eqi is None:
2025-07-01 17:49:07.576 eqi, eqj = i, j
2025-07-01 17:49:07.576 continue
2025-07-01 17:49:07.576 cruncher.set_seq1(ai)
2025-07-01 17:49:07.576 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.576 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.576 # compares by a factor of 3.
2025-07-01 17:49:07.576 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.576 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.577 # of the computation is cached by cruncher
2025-07-01 17:49:07.577 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.577 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.577 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.577 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.577 if best_ratio < cutoff:
2025-07-01 17:49:07.577 # no non-identical "pretty close" pair
2025-07-01 17:49:07.577 if eqi is None:
2025-07-01 17:49:07.577 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.577 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.577 return
2025-07-01 17:49:07.577 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.577 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.577 else:
2025-07-01 17:49:07.577 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.577 eqi = None
2025-07-01 17:49:07.577
2025-07-01 17:49:07.577 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.577 # identical
2025-07-01 17:49:07.577
2025-07-01 17:49:07.578 # pump out diffs from before the synch point
2025-07-01 17:49:07.578 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.578
2025-07-01 17:49:07.578 # do intraline marking on the synch pair
2025-07-01 17:49:07.578 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.578 if eqi is None:
2025-07-01 17:49:07.578 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.578 atags = btags = ""
2025-07-01 17:49:07.578 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.578 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.578 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.578 if tag == 'replace':
2025-07-01 17:49:07.578 atags += '^' * la
2025-07-01 17:49:07.578 btags += '^' * lb
2025-07-01 17:49:07.578 elif tag == 'delete':
2025-07-01 17:49:07.578 atags += '-' * la
2025-07-01 17:49:07.578 elif tag == 'insert':
2025-07-01 17:49:07.578 btags += '+' * lb
2025-07-01 17:49:07.578 elif tag == 'equal':
2025-07-01 17:49:07.578 atags += ' ' * la
2025-07-01 17:49:07.584 btags += ' ' * lb
2025-07-01 17:49:07.584 else:
2025-07-01 17:49:07.584 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.584 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.584 else:
2025-07-01 17:49:07.584 # the synch pair is identical
2025-07-01 17:49:07.584 yield ' ' + aelt
2025-07-01 17:49:07.584
2025-07-01 17:49:07.584 # pump out diffs from after the synch point
2025-07-01 17:49:07.584 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.584
2025-07-01 17:49:07.584 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.584 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.584
2025-07-01 17:49:07.584 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.584 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.584 alo = 283, ahi = 1101
2025-07-01 17:49:07.584 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.584 blo = 283, bhi = 1101
2025-07-01 17:49:07.585
2025-07-01 17:49:07.585 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.585 g = []
2025-07-01 17:49:07.585 if alo < ahi:
2025-07-01 17:49:07.585 if blo < bhi:
2025-07-01 17:49:07.585 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.585 else:
2025-07-01 17:49:07.585 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.585 elif blo < bhi:
2025-07-01 17:49:07.585 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.585
2025-07-01 17:49:07.585 > yield from g
2025-07-01 17:49:07.585
2025-07-01 17:49:07.585 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.585 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.585
2025-07-01 17:49:07.585 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.585 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.585 alo = 283, ahi = 1101
2025-07-01 17:49:07.585 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.586 blo = 283, bhi = 1101
2025-07-01 17:49:07.586
2025-07-01 17:49:07.586 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.586 r"""
2025-07-01 17:49:07.586 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.586 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.586 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.586 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.586
2025-07-01 17:49:07.586 Example:
2025-07-01 17:49:07.586
2025-07-01 17:49:07.586 >>> d = Differ()
2025-07-01 17:49:07.586 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.586 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.586 >>> print(''.join(results), end="")
2025-07-01 17:49:07.586 - abcDefghiJkl
2025-07-01 17:49:07.586 + abcdefGhijkl
2025-07-01 17:49:07.586 """
2025-07-01 17:49:07.586
2025-07-01 17:49:07.587 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.587 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.587 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.587 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.587 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.587
2025-07-01 17:49:07.587 # search for the pair that matches best without being identical
2025-07-01 17:49:07.587 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.587 # on junk -- unless we have to)
2025-07-01 17:49:07.587 for j in range(blo, bhi):
2025-07-01 17:49:07.587 bj = b[j]
2025-07-01 17:49:07.587 cruncher.set_seq2(bj)
2025-07-01 17:49:07.587 for i in range(alo, ahi):
2025-07-01 17:49:07.587 ai = a[i]
2025-07-01 17:49:07.587 if ai == bj:
2025-07-01 17:49:07.587 if eqi is None:
2025-07-01 17:49:07.587 eqi, eqj = i, j
2025-07-01 17:49:07.587 continue
2025-07-01 17:49:07.587 cruncher.set_seq1(ai)
2025-07-01 17:49:07.587 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.587 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.588 # compares by a factor of 3.
2025-07-01 17:49:07.588 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.588 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.588 # of the computation is cached by cruncher
2025-07-01 17:49:07.588 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.588 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.588 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.588 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.588 if best_ratio < cutoff:
2025-07-01 17:49:07.588 # no non-identical "pretty close" pair
2025-07-01 17:49:07.588 if eqi is None:
2025-07-01 17:49:07.588 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.588 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.588 return
2025-07-01 17:49:07.588 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.588 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.588 else:
2025-07-01 17:49:07.588 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.588 eqi = None
2025-07-01 17:49:07.588
2025-07-01 17:49:07.588 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.589 # identical
2025-07-01 17:49:07.589
2025-07-01 17:49:07.589 # pump out diffs from before the synch point
2025-07-01 17:49:07.589 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.589
2025-07-01 17:49:07.589 # do intraline marking on the synch pair
2025-07-01 17:49:07.589 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.589 if eqi is None:
2025-07-01 17:49:07.589 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.589 atags = btags = ""
2025-07-01 17:49:07.589 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.589 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.589 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.589 if tag == 'replace':
2025-07-01 17:49:07.589 atags += '^' * la
2025-07-01 17:49:07.589 btags += '^' * lb
2025-07-01 17:49:07.589 elif tag == 'delete':
2025-07-01 17:49:07.589 atags += '-' * la
2025-07-01 17:49:07.589 elif tag == 'insert':
2025-07-01 17:49:07.589 btags += '+' * lb
2025-07-01 17:49:07.589 elif tag == 'equal':
2025-07-01 17:49:07.590 atags += ' ' * la
2025-07-01 17:49:07.590 btags += ' ' * lb
2025-07-01 17:49:07.590 else:
2025-07-01 17:49:07.590 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.590 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.590 else:
2025-07-01 17:49:07.590 # the synch pair is identical
2025-07-01 17:49:07.590 yield ' ' + aelt
2025-07-01 17:49:07.590
2025-07-01 17:49:07.590 # pump out diffs from after the synch point
2025-07-01 17:49:07.590 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.590
2025-07-01 17:49:07.590 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.590 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.590
2025-07-01 17:49:07.590 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.590 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.590 alo = 284, ahi = 1101
2025-07-01 17:49:07.590 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.590 blo = 284, bhi = 1101
2025-07-01 17:49:07.590
2025-07-01 17:49:07.591 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.591 g = []
2025-07-01 17:49:07.591 if alo < ahi:
2025-07-01 17:49:07.591 if blo < bhi:
2025-07-01 17:49:07.591 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.591 else:
2025-07-01 17:49:07.591 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.591 elif blo < bhi:
2025-07-01 17:49:07.591 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.591
2025-07-01 17:49:07.591 > yield from g
2025-07-01 17:49:07.591
2025-07-01 17:49:07.591 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.591 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.591
2025-07-01 17:49:07.591 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.591 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.591 alo = 284, ahi = 1101
2025-07-01 17:49:07.591 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.592 blo = 284, bhi = 1101
2025-07-01 17:49:07.592
2025-07-01 17:49:07.592 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.592 r"""
2025-07-01 17:49:07.592 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.592 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.592 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.592 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.592
2025-07-01 17:49:07.592 Example:
2025-07-01 17:49:07.592
2025-07-01 17:49:07.592 >>> d = Differ()
2025-07-01 17:49:07.592 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.592 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.592 >>> print(''.join(results), end="")
2025-07-01 17:49:07.592 - abcDefghiJkl
2025-07-01 17:49:07.592 + abcdefGhijkl
2025-07-01 17:49:07.592 """
2025-07-01 17:49:07.592
2025-07-01 17:49:07.593 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.593 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.593 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.593 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.593 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.593
2025-07-01 17:49:07.593 # search for the pair that matches best without being identical
2025-07-01 17:49:07.593 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.593 # on junk -- unless we have to)
2025-07-01 17:49:07.593 for j in range(blo, bhi):
2025-07-01 17:49:07.593 bj = b[j]
2025-07-01 17:49:07.593 cruncher.set_seq2(bj)
2025-07-01 17:49:07.593 for i in range(alo, ahi):
2025-07-01 17:49:07.593 ai = a[i]
2025-07-01 17:49:07.593 if ai == bj:
2025-07-01 17:49:07.593 if eqi is None:
2025-07-01 17:49:07.593 eqi, eqj = i, j
2025-07-01 17:49:07.593 continue
2025-07-01 17:49:07.593 cruncher.set_seq1(ai)
2025-07-01 17:49:07.593 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.597 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.597 # compares by a factor of 3.
2025-07-01 17:49:07.597 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.597 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.597 # of the computation is cached by cruncher
2025-07-01 17:49:07.597 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.597 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.597 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.597 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.597 if best_ratio < cutoff:
2025-07-01 17:49:07.597 # no non-identical "pretty close" pair
2025-07-01 17:49:07.597 if eqi is None:
2025-07-01 17:49:07.597 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.597 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.597 return
2025-07-01 17:49:07.597 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.597 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.597 else:
2025-07-01 17:49:07.597 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.598 eqi = None
2025-07-01 17:49:07.598
2025-07-01 17:49:07.598 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.598 # identical
2025-07-01 17:49:07.598
2025-07-01 17:49:07.598 # pump out diffs from before the synch point
2025-07-01 17:49:07.598 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.598
2025-07-01 17:49:07.598 # do intraline marking on the synch pair
2025-07-01 17:49:07.598 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.598 if eqi is None:
2025-07-01 17:49:07.598 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.598 atags = btags = ""
2025-07-01 17:49:07.598 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.598 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.598 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.598 if tag == 'replace':
2025-07-01 17:49:07.598 atags += '^' * la
2025-07-01 17:49:07.598 btags += '^' * lb
2025-07-01 17:49:07.598 elif tag == 'delete':
2025-07-01 17:49:07.598 atags += '-' * la
2025-07-01 17:49:07.599 elif tag == 'insert':
2025-07-01 17:49:07.599 btags += '+' * lb
2025-07-01 17:49:07.599 elif tag == 'equal':
2025-07-01 17:49:07.599 atags += ' ' * la
2025-07-01 17:49:07.599 btags += ' ' * lb
2025-07-01 17:49:07.599 else:
2025-07-01 17:49:07.599 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.599 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.599 else:
2025-07-01 17:49:07.599 # the synch pair is identical
2025-07-01 17:49:07.599 yield ' ' + aelt
2025-07-01 17:49:07.599
2025-07-01 17:49:07.599 # pump out diffs from after the synch point
2025-07-01 17:49:07.599 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.599
2025-07-01 17:49:07.599 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.599 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.599
2025-07-01 17:49:07.599 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.600 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.600 alo = 285, ahi = 1101
2025-07-01 17:49:07.600 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.600 blo = 285, bhi = 1101
2025-07-01 17:49:07.600
2025-07-01 17:49:07.600 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.600 g = []
2025-07-01 17:49:07.600 if alo < ahi:
2025-07-01 17:49:07.600 if blo < bhi:
2025-07-01 17:49:07.600 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.600 else:
2025-07-01 17:49:07.600 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.600 elif blo < bhi:
2025-07-01 17:49:07.600 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.600
2025-07-01 17:49:07.600 > yield from g
2025-07-01 17:49:07.600
2025-07-01 17:49:07.600 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.600 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.600
2025-07-01 17:49:07.601 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.601 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.601 alo = 285, ahi = 1101
2025-07-01 17:49:07.601 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.601 blo = 285, bhi = 1101
2025-07-01 17:49:07.601
2025-07-01 17:49:07.601 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.601 r"""
2025-07-01 17:49:07.601 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.601 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.601 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.601 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.601
2025-07-01 17:49:07.601 Example:
2025-07-01 17:49:07.601
2025-07-01 17:49:07.601 >>> d = Differ()
2025-07-01 17:49:07.601 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.601 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.601 >>> print(''.join(results), end="")
2025-07-01 17:49:07.601 - abcDefghiJkl
2025-07-01 17:49:07.602 + abcdefGhijkl
2025-07-01 17:49:07.602 """
2025-07-01 17:49:07.602
2025-07-01 17:49:07.602 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.602 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.602 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.602 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.602 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.602
2025-07-01 17:49:07.602 # search for the pair that matches best without being identical
2025-07-01 17:49:07.602 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.602 # on junk -- unless we have to)
2025-07-01 17:49:07.602 for j in range(blo, bhi):
2025-07-01 17:49:07.602 bj = b[j]
2025-07-01 17:49:07.602 cruncher.set_seq2(bj)
2025-07-01 17:49:07.602 for i in range(alo, ahi):
2025-07-01 17:49:07.602 ai = a[i]
2025-07-01 17:49:07.602 if ai == bj:
2025-07-01 17:49:07.603 if eqi is None:
2025-07-01 17:49:07.603 eqi, eqj = i, j
2025-07-01 17:49:07.603 continue
2025-07-01 17:49:07.603 cruncher.set_seq1(ai)
2025-07-01 17:49:07.603 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.603 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.603 # compares by a factor of 3.
2025-07-01 17:49:07.603 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.603 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.603 # of the computation is cached by cruncher
2025-07-01 17:49:07.603 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.603 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.603 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.603 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.603 if best_ratio < cutoff:
2025-07-01 17:49:07.603 # no non-identical "pretty close" pair
2025-07-01 17:49:07.603 if eqi is None:
2025-07-01 17:49:07.603 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.603 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.604 return
2025-07-01 17:49:07.604 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.604 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.604 else:
2025-07-01 17:49:07.604 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.604 eqi = None
2025-07-01 17:49:07.604
2025-07-01 17:49:07.604 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.604 # identical
2025-07-01 17:49:07.604
2025-07-01 17:49:07.604 # pump out diffs from before the synch point
2025-07-01 17:49:07.604 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.604
2025-07-01 17:49:07.604 # do intraline marking on the synch pair
2025-07-01 17:49:07.604 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.604 if eqi is None:
2025-07-01 17:49:07.604 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.604 atags = btags = ""
2025-07-01 17:49:07.604 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.604 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.604 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.605 if tag == 'replace':
2025-07-01 17:49:07.605 atags += '^' * la
2025-07-01 17:49:07.605 btags += '^' * lb
2025-07-01 17:49:07.605 elif tag == 'delete':
2025-07-01 17:49:07.605 atags += '-' * la
2025-07-01 17:49:07.605 elif tag == 'insert':
2025-07-01 17:49:07.605 btags += '+' * lb
2025-07-01 17:49:07.605 elif tag == 'equal':
2025-07-01 17:49:07.605 atags += ' ' * la
2025-07-01 17:49:07.605 btags += ' ' * lb
2025-07-01 17:49:07.605 else:
2025-07-01 17:49:07.605 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.605 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.605 else:
2025-07-01 17:49:07.605 # the synch pair is identical
2025-07-01 17:49:07.605 yield ' ' + aelt
2025-07-01 17:49:07.605
2025-07-01 17:49:07.605 # pump out diffs from after the synch point
2025-07-01 17:49:07.605 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.605
2025-07-01 17:49:07.606 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.606 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.606
2025-07-01 17:49:07.606 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.606 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.606 alo = 286, ahi = 1101
2025-07-01 17:49:07.606 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.606 blo = 286, bhi = 1101
2025-07-01 17:49:07.606
2025-07-01 17:49:07.606 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.606 g = []
2025-07-01 17:49:07.606 if alo < ahi:
2025-07-01 17:49:07.606 if blo < bhi:
2025-07-01 17:49:07.606 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.606 else:
2025-07-01 17:49:07.606 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.606 elif blo < bhi:
2025-07-01 17:49:07.606 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.606
2025-07-01 17:49:07.606 > yield from g
2025-07-01 17:49:07.607
2025-07-01 17:49:07.607 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.607 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.607
2025-07-01 17:49:07.607 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.607 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.607 alo = 286, ahi = 1101
2025-07-01 17:49:07.607 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.607 blo = 286, bhi = 1101
2025-07-01 17:49:07.607
2025-07-01 17:49:07.607 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.607 r"""
2025-07-01 17:49:07.607 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.607 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.607 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.607 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.607
2025-07-01 17:49:07.607 Example:
2025-07-01 17:49:07.607
2025-07-01 17:49:07.607 >>> d = Differ()
2025-07-01 17:49:07.607 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.608 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.608 >>> print(''.join(results), end="")
2025-07-01 17:49:07.608 - abcDefghiJkl
2025-07-01 17:49:07.608 + abcdefGhijkl
2025-07-01 17:49:07.608 """
2025-07-01 17:49:07.608
2025-07-01 17:49:07.608 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.608 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.608 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.608 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.608 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.608
2025-07-01 17:49:07.608 # search for the pair that matches best without being identical
2025-07-01 17:49:07.608 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.608 # on junk -- unless we have to)
2025-07-01 17:49:07.608 for j in range(blo, bhi):
2025-07-01 17:49:07.608 bj = b[j]
2025-07-01 17:49:07.608 cruncher.set_seq2(bj)
2025-07-01 17:49:07.609 for i in range(alo, ahi):
2025-07-01 17:49:07.609 ai = a[i]
2025-07-01 17:49:07.609 if ai == bj:
2025-07-01 17:49:07.609 if eqi is None:
2025-07-01 17:49:07.609 eqi, eqj = i, j
2025-07-01 17:49:07.609 continue
2025-07-01 17:49:07.609 cruncher.set_seq1(ai)
2025-07-01 17:49:07.609 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.609 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.609 # compares by a factor of 3.
2025-07-01 17:49:07.609 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.609 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.609 # of the computation is cached by cruncher
2025-07-01 17:49:07.609 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.609 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.609 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.609 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.609 if best_ratio < cutoff:
2025-07-01 17:49:07.609 # no non-identical "pretty close" pair
2025-07-01 17:49:07.609 if eqi is None:
2025-07-01 17:49:07.610 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.614 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.614 return
2025-07-01 17:49:07.615 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.615 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.615 else:
2025-07-01 17:49:07.615 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.615 eqi = None
2025-07-01 17:49:07.615
2025-07-01 17:49:07.615 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.615 # identical
2025-07-01 17:49:07.615
2025-07-01 17:49:07.615 # pump out diffs from before the synch point
2025-07-01 17:49:07.615 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.615
2025-07-01 17:49:07.615 # do intraline marking on the synch pair
2025-07-01 17:49:07.615 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.615 if eqi is None:
2025-07-01 17:49:07.615 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.615 atags = btags = ""
2025-07-01 17:49:07.615 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.615 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.616 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.616 if tag == 'replace':
2025-07-01 17:49:07.616 atags += '^' * la
2025-07-01 17:49:07.616 btags += '^' * lb
2025-07-01 17:49:07.616 elif tag == 'delete':
2025-07-01 17:49:07.616 atags += '-' * la
2025-07-01 17:49:07.616 elif tag == 'insert':
2025-07-01 17:49:07.616 btags += '+' * lb
2025-07-01 17:49:07.616 elif tag == 'equal':
2025-07-01 17:49:07.616 atags += ' ' * la
2025-07-01 17:49:07.616 btags += ' ' * lb
2025-07-01 17:49:07.616 else:
2025-07-01 17:49:07.616 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.616 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.616 else:
2025-07-01 17:49:07.616 # the synch pair is identical
2025-07-01 17:49:07.616 yield ' ' + aelt
2025-07-01 17:49:07.616
2025-07-01 17:49:07.616 # pump out diffs from after the synch point
2025-07-01 17:49:07.616 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.617
2025-07-01 17:49:07.617 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.617 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.617
2025-07-01 17:49:07.617 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.617 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.617 alo = 287, ahi = 1101
2025-07-01 17:49:07.617 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.617 blo = 287, bhi = 1101
2025-07-01 17:49:07.617
2025-07-01 17:49:07.617 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.617 g = []
2025-07-01 17:49:07.617 if alo < ahi:
2025-07-01 17:49:07.617 if blo < bhi:
2025-07-01 17:49:07.617 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.617 else:
2025-07-01 17:49:07.617 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.617 elif blo < bhi:
2025-07-01 17:49:07.617 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.617
2025-07-01 17:49:07.618 > yield from g
2025-07-01 17:49:07.618
2025-07-01 17:49:07.618 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.618 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.618
2025-07-01 17:49:07.618 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.618 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.618 alo = 287, ahi = 1101
2025-07-01 17:49:07.618 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.618 blo = 287, bhi = 1101
2025-07-01 17:49:07.618
2025-07-01 17:49:07.618 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.618 r"""
2025-07-01 17:49:07.618 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.618 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.618 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.618 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.618
2025-07-01 17:49:07.618 Example:
2025-07-01 17:49:07.618
2025-07-01 17:49:07.619 >>> d = Differ()
2025-07-01 17:49:07.619 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.619 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.619 >>> print(''.join(results), end="")
2025-07-01 17:49:07.619 - abcDefghiJkl
2025-07-01 17:49:07.619 + abcdefGhijkl
2025-07-01 17:49:07.619 """
2025-07-01 17:49:07.619
2025-07-01 17:49:07.619 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.619 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.619 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.619 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.619 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.619
2025-07-01 17:49:07.619 # search for the pair that matches best without being identical
2025-07-01 17:49:07.619 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.619 # on junk -- unless we have to)
2025-07-01 17:49:07.619 for j in range(blo, bhi):
2025-07-01 17:49:07.619 bj = b[j]
2025-07-01 17:49:07.620 cruncher.set_seq2(bj)
2025-07-01 17:49:07.620 for i in range(alo, ahi):
2025-07-01 17:49:07.620 ai = a[i]
2025-07-01 17:49:07.620 if ai == bj:
2025-07-01 17:49:07.620 if eqi is None:
2025-07-01 17:49:07.620 eqi, eqj = i, j
2025-07-01 17:49:07.620 continue
2025-07-01 17:49:07.620 cruncher.set_seq1(ai)
2025-07-01 17:49:07.620 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.620 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.620 # compares by a factor of 3.
2025-07-01 17:49:07.620 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.620 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.620 # of the computation is cached by cruncher
2025-07-01 17:49:07.620 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.620 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.620 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.620 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.620 if best_ratio < cutoff:
2025-07-01 17:49:07.620 # no non-identical "pretty close" pair
2025-07-01 17:49:07.620 if eqi is None:
2025-07-01 17:49:07.620 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.621 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.621 return
2025-07-01 17:49:07.621 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.621 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.621 else:
2025-07-01 17:49:07.621 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.621 eqi = None
2025-07-01 17:49:07.621
2025-07-01 17:49:07.621 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.621 # identical
2025-07-01 17:49:07.621
2025-07-01 17:49:07.621 # pump out diffs from before the synch point
2025-07-01 17:49:07.621 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.621
2025-07-01 17:49:07.621 # do intraline marking on the synch pair
2025-07-01 17:49:07.621 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.621 if eqi is None:
2025-07-01 17:49:07.621 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.621 atags = btags = ""
2025-07-01 17:49:07.621 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.621 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.622 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.622 if tag == 'replace':
2025-07-01 17:49:07.622 atags += '^' * la
2025-07-01 17:49:07.622 btags += '^' * lb
2025-07-01 17:49:07.622 elif tag == 'delete':
2025-07-01 17:49:07.622 atags += '-' * la
2025-07-01 17:49:07.622 elif tag == 'insert':
2025-07-01 17:49:07.622 btags += '+' * lb
2025-07-01 17:49:07.622 elif tag == 'equal':
2025-07-01 17:49:07.622 atags += ' ' * la
2025-07-01 17:49:07.622 btags += ' ' * lb
2025-07-01 17:49:07.622 else:
2025-07-01 17:49:07.622 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.622 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.622 else:
2025-07-01 17:49:07.622 # the synch pair is identical
2025-07-01 17:49:07.622 yield ' ' + aelt
2025-07-01 17:49:07.622
2025-07-01 17:49:07.622 # pump out diffs from after the synch point
2025-07-01 17:49:07.622 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.622
2025-07-01 17:49:07.622 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.623 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.623
2025-07-01 17:49:07.623 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.623 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.623 alo = 290, ahi = 1101
2025-07-01 17:49:07.623 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.623 blo = 290, bhi = 1101
2025-07-01 17:49:07.623
2025-07-01 17:49:07.623 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.623 g = []
2025-07-01 17:49:07.623 if alo < ahi:
2025-07-01 17:49:07.623 if blo < bhi:
2025-07-01 17:49:07.623 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.623 else:
2025-07-01 17:49:07.623 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.623 elif blo < bhi:
2025-07-01 17:49:07.623 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.623
2025-07-01 17:49:07.623 > yield from g
2025-07-01 17:49:07.623
2025-07-01 17:49:07.624 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.624 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.624
2025-07-01 17:49:07.624 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.624 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.624 alo = 290, ahi = 1101
2025-07-01 17:49:07.624 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.624 blo = 290, bhi = 1101
2025-07-01 17:49:07.624
2025-07-01 17:49:07.624 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.624 r"""
2025-07-01 17:49:07.624 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.624 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.624 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.624 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.624
2025-07-01 17:49:07.624 Example:
2025-07-01 17:49:07.624
2025-07-01 17:49:07.624 >>> d = Differ()
2025-07-01 17:49:07.627 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.628 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.628 >>> print(''.join(results), end="")
2025-07-01 17:49:07.628 - abcDefghiJkl
2025-07-01 17:49:07.628 + abcdefGhijkl
2025-07-01 17:49:07.628 """
2025-07-01 17:49:07.628
2025-07-01 17:49:07.628 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.628 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.628 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.628 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.628 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.628
2025-07-01 17:49:07.628 # search for the pair that matches best without being identical
2025-07-01 17:49:07.628 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.628 # on junk -- unless we have to)
2025-07-01 17:49:07.628 for j in range(blo, bhi):
2025-07-01 17:49:07.628 bj = b[j]
2025-07-01 17:49:07.628 cruncher.set_seq2(bj)
2025-07-01 17:49:07.628 for i in range(alo, ahi):
2025-07-01 17:49:07.629 ai = a[i]
2025-07-01 17:49:07.629 if ai == bj:
2025-07-01 17:49:07.629 if eqi is None:
2025-07-01 17:49:07.629 eqi, eqj = i, j
2025-07-01 17:49:07.629 continue
2025-07-01 17:49:07.629 cruncher.set_seq1(ai)
2025-07-01 17:49:07.629 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.629 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.629 # compares by a factor of 3.
2025-07-01 17:49:07.629 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.629 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.629 # of the computation is cached by cruncher
2025-07-01 17:49:07.629 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.629 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.629 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.629 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.629 if best_ratio < cutoff:
2025-07-01 17:49:07.629 # no non-identical "pretty close" pair
2025-07-01 17:49:07.629 if eqi is None:
2025-07-01 17:49:07.629 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.629 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.630 return
2025-07-01 17:49:07.630 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.630 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.630 else:
2025-07-01 17:49:07.630 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.630 eqi = None
2025-07-01 17:49:07.630
2025-07-01 17:49:07.630 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.630 # identical
2025-07-01 17:49:07.630
2025-07-01 17:49:07.630 # pump out diffs from before the synch point
2025-07-01 17:49:07.630 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.630
2025-07-01 17:49:07.630 # do intraline marking on the synch pair
2025-07-01 17:49:07.630 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.630 if eqi is None:
2025-07-01 17:49:07.630 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.630 atags = btags = ""
2025-07-01 17:49:07.630 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.630 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.630 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.631 if tag == 'replace':
2025-07-01 17:49:07.631 atags += '^' * la
2025-07-01 17:49:07.631 btags += '^' * lb
2025-07-01 17:49:07.631 elif tag == 'delete':
2025-07-01 17:49:07.631 atags += '-' * la
2025-07-01 17:49:07.631 elif tag == 'insert':
2025-07-01 17:49:07.631 btags += '+' * lb
2025-07-01 17:49:07.631 elif tag == 'equal':
2025-07-01 17:49:07.631 atags += ' ' * la
2025-07-01 17:49:07.631 btags += ' ' * lb
2025-07-01 17:49:07.631 else:
2025-07-01 17:49:07.631 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.631 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.631 else:
2025-07-01 17:49:07.631 # the synch pair is identical
2025-07-01 17:49:07.631 yield ' ' + aelt
2025-07-01 17:49:07.631
2025-07-01 17:49:07.631 # pump out diffs from after the synch point
2025-07-01 17:49:07.631 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.631
2025-07-01 17:49:07.632 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.632 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.632
2025-07-01 17:49:07.632 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.632 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.632 alo = 291, ahi = 1101
2025-07-01 17:49:07.632 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.632 blo = 291, bhi = 1101
2025-07-01 17:49:07.632
2025-07-01 17:49:07.632 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.632 g = []
2025-07-01 17:49:07.632 if alo < ahi:
2025-07-01 17:49:07.632 if blo < bhi:
2025-07-01 17:49:07.632 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.632 else:
2025-07-01 17:49:07.632 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.632 elif blo < bhi:
2025-07-01 17:49:07.632 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.632
2025-07-01 17:49:07.632 > yield from g
2025-07-01 17:49:07.632
2025-07-01 17:49:07.633 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.633 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.633
2025-07-01 17:49:07.633 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.633 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.633 alo = 291, ahi = 1101
2025-07-01 17:49:07.633 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.633 blo = 291, bhi = 1101
2025-07-01 17:49:07.633
2025-07-01 17:49:07.633 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.633 r"""
2025-07-01 17:49:07.633 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.633 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.633 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.633 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.633
2025-07-01 17:49:07.633 Example:
2025-07-01 17:49:07.633
2025-07-01 17:49:07.633 >>> d = Differ()
2025-07-01 17:49:07.633 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.633 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.634 >>> print(''.join(results), end="")
2025-07-01 17:49:07.634 - abcDefghiJkl
2025-07-01 17:49:07.634 + abcdefGhijkl
2025-07-01 17:49:07.634 """
2025-07-01 17:49:07.634
2025-07-01 17:49:07.634 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.634 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.634 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.634 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.634 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.634
2025-07-01 17:49:07.634 # search for the pair that matches best without being identical
2025-07-01 17:49:07.634 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.634 # on junk -- unless we have to)
2025-07-01 17:49:07.634 for j in range(blo, bhi):
2025-07-01 17:49:07.634 bj = b[j]
2025-07-01 17:49:07.634 cruncher.set_seq2(bj)
2025-07-01 17:49:07.634 for i in range(alo, ahi):
2025-07-01 17:49:07.634 ai = a[i]
2025-07-01 17:49:07.634 if ai == bj:
2025-07-01 17:49:07.635 if eqi is None:
2025-07-01 17:49:07.635 eqi, eqj = i, j
2025-07-01 17:49:07.635 continue
2025-07-01 17:49:07.635 cruncher.set_seq1(ai)
2025-07-01 17:49:07.635 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.635 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.635 # compares by a factor of 3.
2025-07-01 17:49:07.635 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.635 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.635 # of the computation is cached by cruncher
2025-07-01 17:49:07.635 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.635 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.635 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.635 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.635 if best_ratio < cutoff:
2025-07-01 17:49:07.635 # no non-identical "pretty close" pair
2025-07-01 17:49:07.635 if eqi is None:
2025-07-01 17:49:07.635 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.635 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.635 return
2025-07-01 17:49:07.635 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.636 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.636 else:
2025-07-01 17:49:07.636 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.636 eqi = None
2025-07-01 17:49:07.636
2025-07-01 17:49:07.636 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.636 # identical
2025-07-01 17:49:07.636
2025-07-01 17:49:07.636 # pump out diffs from before the synch point
2025-07-01 17:49:07.636 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.636
2025-07-01 17:49:07.636 # do intraline marking on the synch pair
2025-07-01 17:49:07.636 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.636 if eqi is None:
2025-07-01 17:49:07.636 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.636 atags = btags = ""
2025-07-01 17:49:07.636 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.636 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.637 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.637 if tag == 'replace':
2025-07-01 17:49:07.637 atags += '^' * la
2025-07-01 17:49:07.637 btags += '^' * lb
2025-07-01 17:49:07.637 elif tag == 'delete':
2025-07-01 17:49:07.637 atags += '-' * la
2025-07-01 17:49:07.637 elif tag == 'insert':
2025-07-01 17:49:07.637 btags += '+' * lb
2025-07-01 17:49:07.637 elif tag == 'equal':
2025-07-01 17:49:07.637 atags += ' ' * la
2025-07-01 17:49:07.637 btags += ' ' * lb
2025-07-01 17:49:07.637 else:
2025-07-01 17:49:07.637 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.637 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.637 else:
2025-07-01 17:49:07.637 # the synch pair is identical
2025-07-01 17:49:07.637 yield ' ' + aelt
2025-07-01 17:49:07.637
2025-07-01 17:49:07.637 # pump out diffs from after the synch point
2025-07-01 17:49:07.637 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.637
2025-07-01 17:49:07.638 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.638 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.638
2025-07-01 17:49:07.638 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.638 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.638 alo = 292, ahi = 1101
2025-07-01 17:49:07.638 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.638 blo = 292, bhi = 1101
2025-07-01 17:49:07.638
2025-07-01 17:49:07.638 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.638 g = []
2025-07-01 17:49:07.638 if alo < ahi:
2025-07-01 17:49:07.638 if blo < bhi:
2025-07-01 17:49:07.638 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.638 else:
2025-07-01 17:49:07.638 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.638 elif blo < bhi:
2025-07-01 17:49:07.638 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.638
2025-07-01 17:49:07.638 > yield from g
2025-07-01 17:49:07.638
2025-07-01 17:49:07.639 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.639 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.639
2025-07-01 17:49:07.639 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.639 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.639 alo = 292, ahi = 1101
2025-07-01 17:49:07.639 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.639 blo = 292, bhi = 1101
2025-07-01 17:49:07.639
2025-07-01 17:49:07.639 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.639 r"""
2025-07-01 17:49:07.639 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.639 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.639 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.639 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.639
2025-07-01 17:49:07.639 Example:
2025-07-01 17:49:07.639
2025-07-01 17:49:07.639 >>> d = Differ()
2025-07-01 17:49:07.639 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.639 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.640 >>> print(''.join(results), end="")
2025-07-01 17:49:07.640 - abcDefghiJkl
2025-07-01 17:49:07.640 + abcdefGhijkl
2025-07-01 17:49:07.640 """
2025-07-01 17:49:07.640
2025-07-01 17:49:07.640 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.640 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.640 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.640 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.640 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.640
2025-07-01 17:49:07.640 # search for the pair that matches best without being identical
2025-07-01 17:49:07.640 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.640 # on junk -- unless we have to)
2025-07-01 17:49:07.640 for j in range(blo, bhi):
2025-07-01 17:49:07.640 bj = b[j]
2025-07-01 17:49:07.640 cruncher.set_seq2(bj)
2025-07-01 17:49:07.640 for i in range(alo, ahi):
2025-07-01 17:49:07.640 ai = a[i]
2025-07-01 17:49:07.640 if ai == bj:
2025-07-01 17:49:07.641 if eqi is None:
2025-07-01 17:49:07.646 eqi, eqj = i, j
2025-07-01 17:49:07.646 continue
2025-07-01 17:49:07.646 cruncher.set_seq1(ai)
2025-07-01 17:49:07.646 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.646 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.646 # compares by a factor of 3.
2025-07-01 17:49:07.646 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.646 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.646 # of the computation is cached by cruncher
2025-07-01 17:49:07.646 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.646 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.646 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.646 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.646 if best_ratio < cutoff:
2025-07-01 17:49:07.646 # no non-identical "pretty close" pair
2025-07-01 17:49:07.647 if eqi is None:
2025-07-01 17:49:07.647 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.647 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.647 return
2025-07-01 17:49:07.647 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.647 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.647 else:
2025-07-01 17:49:07.647 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.647 eqi = None
2025-07-01 17:49:07.647
2025-07-01 17:49:07.647 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.647 # identical
2025-07-01 17:49:07.647
2025-07-01 17:49:07.647 # pump out diffs from before the synch point
2025-07-01 17:49:07.647 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.647
2025-07-01 17:49:07.647 # do intraline marking on the synch pair
2025-07-01 17:49:07.647 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.647 if eqi is None:
2025-07-01 17:49:07.647 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.648 atags = btags = ""
2025-07-01 17:49:07.648 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.648 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.648 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.648 if tag == 'replace':
2025-07-01 17:49:07.648 atags += '^' * la
2025-07-01 17:49:07.648 btags += '^' * lb
2025-07-01 17:49:07.648 elif tag == 'delete':
2025-07-01 17:49:07.648 atags += '-' * la
2025-07-01 17:49:07.648 elif tag == 'insert':
2025-07-01 17:49:07.648 btags += '+' * lb
2025-07-01 17:49:07.648 elif tag == 'equal':
2025-07-01 17:49:07.648 atags += ' ' * la
2025-07-01 17:49:07.648 btags += ' ' * lb
2025-07-01 17:49:07.648 else:
2025-07-01 17:49:07.648 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.648 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.648 else:
2025-07-01 17:49:07.648 # the synch pair is identical
2025-07-01 17:49:07.648 yield ' ' + aelt
2025-07-01 17:49:07.649
2025-07-01 17:49:07.649 # pump out diffs from after the synch point
2025-07-01 17:49:07.649 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.649
2025-07-01 17:49:07.649 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.649 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.649
2025-07-01 17:49:07.649 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.649 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.649 alo = 293, ahi = 1101
2025-07-01 17:49:07.649 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.649 blo = 293, bhi = 1101
2025-07-01 17:49:07.649
2025-07-01 17:49:07.649 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.649 g = []
2025-07-01 17:49:07.649 if alo < ahi:
2025-07-01 17:49:07.649 if blo < bhi:
2025-07-01 17:49:07.649 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.649 else:
2025-07-01 17:49:07.650 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.650 elif blo < bhi:
2025-07-01 17:49:07.650 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.650
2025-07-01 17:49:07.650 > yield from g
2025-07-01 17:49:07.650
2025-07-01 17:49:07.650 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.650 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.650
2025-07-01 17:49:07.650 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.650 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.650 alo = 293, ahi = 1101
2025-07-01 17:49:07.650 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.650 blo = 293, bhi = 1101
2025-07-01 17:49:07.650
2025-07-01 17:49:07.650 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.650 r"""
2025-07-01 17:49:07.650 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.650 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.650 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.651 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.651
2025-07-01 17:49:07.651 Example:
2025-07-01 17:49:07.651
2025-07-01 17:49:07.651 >>> d = Differ()
2025-07-01 17:49:07.651 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.651 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.651 >>> print(''.join(results), end="")
2025-07-01 17:49:07.651 - abcDefghiJkl
2025-07-01 17:49:07.651 + abcdefGhijkl
2025-07-01 17:49:07.651 """
2025-07-01 17:49:07.651
2025-07-01 17:49:07.651 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.651 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.651 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.651 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.651 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.651
2025-07-01 17:49:07.651 # search for the pair that matches best without being identical
2025-07-01 17:49:07.652 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.652 # on junk -- unless we have to)
2025-07-01 17:49:07.652 for j in range(blo, bhi):
2025-07-01 17:49:07.652 bj = b[j]
2025-07-01 17:49:07.652 cruncher.set_seq2(bj)
2025-07-01 17:49:07.652 for i in range(alo, ahi):
2025-07-01 17:49:07.652 ai = a[i]
2025-07-01 17:49:07.652 if ai == bj:
2025-07-01 17:49:07.652 if eqi is None:
2025-07-01 17:49:07.652 eqi, eqj = i, j
2025-07-01 17:49:07.652 continue
2025-07-01 17:49:07.652 cruncher.set_seq1(ai)
2025-07-01 17:49:07.652 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.652 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.652 # compares by a factor of 3.
2025-07-01 17:49:07.652 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.652 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.652 # of the computation is cached by cruncher
2025-07-01 17:49:07.652 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.652 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.652 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.653 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.653 if best_ratio < cutoff:
2025-07-01 17:49:07.653 # no non-identical "pretty close" pair
2025-07-01 17:49:07.653 if eqi is None:
2025-07-01 17:49:07.653 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.653 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.653 return
2025-07-01 17:49:07.653 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.653 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.653 else:
2025-07-01 17:49:07.653 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.653 eqi = None
2025-07-01 17:49:07.653
2025-07-01 17:49:07.653 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.653 # identical
2025-07-01 17:49:07.653
2025-07-01 17:49:07.653 # pump out diffs from before the synch point
2025-07-01 17:49:07.653 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.653
2025-07-01 17:49:07.653 # do intraline marking on the synch pair
2025-07-01 17:49:07.654 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.654 if eqi is None:
2025-07-01 17:49:07.654 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.654 atags = btags = ""
2025-07-01 17:49:07.654 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.654 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.654 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.654 if tag == 'replace':
2025-07-01 17:49:07.654 atags += '^' * la
2025-07-01 17:49:07.654 btags += '^' * lb
2025-07-01 17:49:07.654 elif tag == 'delete':
2025-07-01 17:49:07.654 atags += '-' * la
2025-07-01 17:49:07.654 elif tag == 'insert':
2025-07-01 17:49:07.654 btags += '+' * lb
2025-07-01 17:49:07.654 elif tag == 'equal':
2025-07-01 17:49:07.654 atags += ' ' * la
2025-07-01 17:49:07.654 btags += ' ' * lb
2025-07-01 17:49:07.654 else:
2025-07-01 17:49:07.654 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.654 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.654 else:
2025-07-01 17:49:07.655 # the synch pair is identical
2025-07-01 17:49:07.655 yield ' ' + aelt
2025-07-01 17:49:07.655
2025-07-01 17:49:07.655 # pump out diffs from after the synch point
2025-07-01 17:49:07.655 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.655
2025-07-01 17:49:07.655 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.655 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.655
2025-07-01 17:49:07.655 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.655 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.655 alo = 294, ahi = 1101
2025-07-01 17:49:07.655 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.655 blo = 294, bhi = 1101
2025-07-01 17:49:07.655
2025-07-01 17:49:07.655 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.655 g = []
2025-07-01 17:49:07.655 if alo < ahi:
2025-07-01 17:49:07.655 if blo < bhi:
2025-07-01 17:49:07.655 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.656 else:
2025-07-01 17:49:07.656 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.656 elif blo < bhi:
2025-07-01 17:49:07.656 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.656
2025-07-01 17:49:07.656 > yield from g
2025-07-01 17:49:07.656
2025-07-01 17:49:07.656 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.656 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.656
2025-07-01 17:49:07.656 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.656 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.656 alo = 294, ahi = 1101
2025-07-01 17:49:07.656 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.656 blo = 294, bhi = 1101
2025-07-01 17:49:07.656
2025-07-01 17:49:07.656 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.656 r"""
2025-07-01 17:49:07.656 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.656 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.656 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.659 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.659
2025-07-01 17:49:07.660 Example:
2025-07-01 17:49:07.660
2025-07-01 17:49:07.660 >>> d = Differ()
2025-07-01 17:49:07.660 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.660 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.660 >>> print(''.join(results), end="")
2025-07-01 17:49:07.660 - abcDefghiJkl
2025-07-01 17:49:07.660 + abcdefGhijkl
2025-07-01 17:49:07.660 """
2025-07-01 17:49:07.660
2025-07-01 17:49:07.660 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.660 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.660 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.660 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.660 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.660
2025-07-01 17:49:07.660 # search for the pair that matches best without being identical
2025-07-01 17:49:07.660 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.661 # on junk -- unless we have to)
2025-07-01 17:49:07.661 for j in range(blo, bhi):
2025-07-01 17:49:07.661 bj = b[j]
2025-07-01 17:49:07.661 cruncher.set_seq2(bj)
2025-07-01 17:49:07.661 for i in range(alo, ahi):
2025-07-01 17:49:07.661 ai = a[i]
2025-07-01 17:49:07.661 if ai == bj:
2025-07-01 17:49:07.661 if eqi is None:
2025-07-01 17:49:07.661 eqi, eqj = i, j
2025-07-01 17:49:07.661 continue
2025-07-01 17:49:07.661 cruncher.set_seq1(ai)
2025-07-01 17:49:07.661 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.661 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.661 # compares by a factor of 3.
2025-07-01 17:49:07.661 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.661 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.661 # of the computation is cached by cruncher
2025-07-01 17:49:07.661 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.661 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.662 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.662 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.662 if best_ratio < cutoff:
2025-07-01 17:49:07.662 # no non-identical "pretty close" pair
2025-07-01 17:49:07.662 if eqi is None:
2025-07-01 17:49:07.662 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.662 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.662 return
2025-07-01 17:49:07.662 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.662 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.662 else:
2025-07-01 17:49:07.662 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.662 eqi = None
2025-07-01 17:49:07.662
2025-07-01 17:49:07.662 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.662 # identical
2025-07-01 17:49:07.662
2025-07-01 17:49:07.662 # pump out diffs from before the synch point
2025-07-01 17:49:07.662 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.662
2025-07-01 17:49:07.663 # do intraline marking on the synch pair
2025-07-01 17:49:07.663 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.663 if eqi is None:
2025-07-01 17:49:07.663 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.663 atags = btags = ""
2025-07-01 17:49:07.663 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.663 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.663 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.663 if tag == 'replace':
2025-07-01 17:49:07.663 atags += '^' * la
2025-07-01 17:49:07.663 btags += '^' * lb
2025-07-01 17:49:07.663 elif tag == 'delete':
2025-07-01 17:49:07.663 atags += '-' * la
2025-07-01 17:49:07.663 elif tag == 'insert':
2025-07-01 17:49:07.663 btags += '+' * lb
2025-07-01 17:49:07.663 elif tag == 'equal':
2025-07-01 17:49:07.663 atags += ' ' * la
2025-07-01 17:49:07.663 btags += ' ' * lb
2025-07-01 17:49:07.663 else:
2025-07-01 17:49:07.663 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.663 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.664 else:
2025-07-01 17:49:07.664 # the synch pair is identical
2025-07-01 17:49:07.664 yield ' ' + aelt
2025-07-01 17:49:07.664
2025-07-01 17:49:07.664 # pump out diffs from after the synch point
2025-07-01 17:49:07.664 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.664
2025-07-01 17:49:07.664 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.664 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.664
2025-07-01 17:49:07.664 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.664 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.664 alo = 295, ahi = 1101
2025-07-01 17:49:07.664 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.664 blo = 295, bhi = 1101
2025-07-01 17:49:07.664
2025-07-01 17:49:07.664 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.664 g = []
2025-07-01 17:49:07.664 if alo < ahi:
2025-07-01 17:49:07.664 if blo < bhi:
2025-07-01 17:49:07.665 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.665 else:
2025-07-01 17:49:07.665 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.665 elif blo < bhi:
2025-07-01 17:49:07.665 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.665
2025-07-01 17:49:07.665 > yield from g
2025-07-01 17:49:07.665
2025-07-01 17:49:07.665 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.665 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.665
2025-07-01 17:49:07.665 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.665 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.665 alo = 295, ahi = 1101
2025-07-01 17:49:07.665 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.665 blo = 295, bhi = 1101
2025-07-01 17:49:07.665
2025-07-01 17:49:07.665 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.665 r"""
2025-07-01 17:49:07.665 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.665 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.666 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.666 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.666
2025-07-01 17:49:07.666 Example:
2025-07-01 17:49:07.666
2025-07-01 17:49:07.666 >>> d = Differ()
2025-07-01 17:49:07.666 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.666 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.666 >>> print(''.join(results), end="")
2025-07-01 17:49:07.666 - abcDefghiJkl
2025-07-01 17:49:07.666 + abcdefGhijkl
2025-07-01 17:49:07.666 """
2025-07-01 17:49:07.666
2025-07-01 17:49:07.666 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.666 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.666 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.666 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.666 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.666
2025-07-01 17:49:07.667 # search for the pair that matches best without being identical
2025-07-01 17:49:07.667 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.667 # on junk -- unless we have to)
2025-07-01 17:49:07.667 for j in range(blo, bhi):
2025-07-01 17:49:07.667 bj = b[j]
2025-07-01 17:49:07.667 cruncher.set_seq2(bj)
2025-07-01 17:49:07.667 for i in range(alo, ahi):
2025-07-01 17:49:07.667 ai = a[i]
2025-07-01 17:49:07.667 if ai == bj:
2025-07-01 17:49:07.667 if eqi is None:
2025-07-01 17:49:07.667 eqi, eqj = i, j
2025-07-01 17:49:07.667 continue
2025-07-01 17:49:07.667 cruncher.set_seq1(ai)
2025-07-01 17:49:07.667 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.667 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.667 # compares by a factor of 3.
2025-07-01 17:49:07.667 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.667 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.667 # of the computation is cached by cruncher
2025-07-01 17:49:07.667 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.667 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.668 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.668 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.668 if best_ratio < cutoff:
2025-07-01 17:49:07.668 # no non-identical "pretty close" pair
2025-07-01 17:49:07.668 if eqi is None:
2025-07-01 17:49:07.668 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.668 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.668 return
2025-07-01 17:49:07.668 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.668 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.668 else:
2025-07-01 17:49:07.668 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.668 eqi = None
2025-07-01 17:49:07.668
2025-07-01 17:49:07.668 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.668 # identical
2025-07-01 17:49:07.668
2025-07-01 17:49:07.668 # pump out diffs from before the synch point
2025-07-01 17:49:07.668 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.668
2025-07-01 17:49:07.668 # do intraline marking on the synch pair
2025-07-01 17:49:07.669 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.669 if eqi is None:
2025-07-01 17:49:07.669 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.669 atags = btags = ""
2025-07-01 17:49:07.669 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.669 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.669 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.669 if tag == 'replace':
2025-07-01 17:49:07.669 atags += '^' * la
2025-07-01 17:49:07.669 btags += '^' * lb
2025-07-01 17:49:07.669 elif tag == 'delete':
2025-07-01 17:49:07.669 atags += '-' * la
2025-07-01 17:49:07.669 elif tag == 'insert':
2025-07-01 17:49:07.669 btags += '+' * lb
2025-07-01 17:49:07.669 elif tag == 'equal':
2025-07-01 17:49:07.669 atags += ' ' * la
2025-07-01 17:49:07.669 btags += ' ' * lb
2025-07-01 17:49:07.669 else:
2025-07-01 17:49:07.669 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.669 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.670 else:
2025-07-01 17:49:07.670 # the synch pair is identical
2025-07-01 17:49:07.670 yield ' ' + aelt
2025-07-01 17:49:07.670
2025-07-01 17:49:07.670 # pump out diffs from after the synch point
2025-07-01 17:49:07.670 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.670
2025-07-01 17:49:07.670 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.670 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.670
2025-07-01 17:49:07.670 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.670 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.670 alo = 296, ahi = 1101
2025-07-01 17:49:07.670 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.670 blo = 296, bhi = 1101
2025-07-01 17:49:07.670
2025-07-01 17:49:07.670 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.670 g = []
2025-07-01 17:49:07.670 if alo < ahi:
2025-07-01 17:49:07.670 if blo < bhi:
2025-07-01 17:49:07.671 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.671 else:
2025-07-01 17:49:07.671 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.671 elif blo < bhi:
2025-07-01 17:49:07.671 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.671
2025-07-01 17:49:07.671 > yield from g
2025-07-01 17:49:07.671
2025-07-01 17:49:07.671 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.671 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.671
2025-07-01 17:49:07.671 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.671 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.671 alo = 296, ahi = 1101
2025-07-01 17:49:07.671 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.671 blo = 296, bhi = 1101
2025-07-01 17:49:07.671
2025-07-01 17:49:07.671 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.671 r"""
2025-07-01 17:49:07.671 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.672 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.672 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.672 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.672
2025-07-01 17:49:07.672 Example:
2025-07-01 17:49:07.672
2025-07-01 17:49:07.672 >>> d = Differ()
2025-07-01 17:49:07.672 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.672 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.672 >>> print(''.join(results), end="")
2025-07-01 17:49:07.672 - abcDefghiJkl
2025-07-01 17:49:07.672 + abcdefGhijkl
2025-07-01 17:49:07.672 """
2025-07-01 17:49:07.672
2025-07-01 17:49:07.672 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.672 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.672 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.672 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.672 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.673
2025-07-01 17:49:07.677 # search for the pair that matches best without being identical
2025-07-01 17:49:07.677 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.678 # on junk -- unless we have to)
2025-07-01 17:49:07.678 for j in range(blo, bhi):
2025-07-01 17:49:07.678 bj = b[j]
2025-07-01 17:49:07.678 cruncher.set_seq2(bj)
2025-07-01 17:49:07.678 for i in range(alo, ahi):
2025-07-01 17:49:07.678 ai = a[i]
2025-07-01 17:49:07.678 if ai == bj:
2025-07-01 17:49:07.678 if eqi is None:
2025-07-01 17:49:07.678 eqi, eqj = i, j
2025-07-01 17:49:07.678 continue
2025-07-01 17:49:07.678 cruncher.set_seq1(ai)
2025-07-01 17:49:07.678 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.678 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.678 # compares by a factor of 3.
2025-07-01 17:49:07.678 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.678 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.678 # of the computation is cached by cruncher
2025-07-01 17:49:07.678 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.678 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.678 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.679 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.679 if best_ratio < cutoff:
2025-07-01 17:49:07.679 # no non-identical "pretty close" pair
2025-07-01 17:49:07.679 if eqi is None:
2025-07-01 17:49:07.679 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.679 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.679 return
2025-07-01 17:49:07.679 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.679 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.679 else:
2025-07-01 17:49:07.679 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.679 eqi = None
2025-07-01 17:49:07.679
2025-07-01 17:49:07.679 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.679 # identical
2025-07-01 17:49:07.679
2025-07-01 17:49:07.679 # pump out diffs from before the synch point
2025-07-01 17:49:07.679 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.679
2025-07-01 17:49:07.679 # do intraline marking on the synch pair
2025-07-01 17:49:07.679 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.680 if eqi is None:
2025-07-01 17:49:07.680 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.680 atags = btags = ""
2025-07-01 17:49:07.680 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.680 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.680 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.680 if tag == 'replace':
2025-07-01 17:49:07.680 atags += '^' * la
2025-07-01 17:49:07.680 btags += '^' * lb
2025-07-01 17:49:07.680 elif tag == 'delete':
2025-07-01 17:49:07.680 atags += '-' * la
2025-07-01 17:49:07.680 elif tag == 'insert':
2025-07-01 17:49:07.680 btags += '+' * lb
2025-07-01 17:49:07.680 elif tag == 'equal':
2025-07-01 17:49:07.680 atags += ' ' * la
2025-07-01 17:49:07.680 btags += ' ' * lb
2025-07-01 17:49:07.680 else:
2025-07-01 17:49:07.680 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.680 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.680 else:
2025-07-01 17:49:07.680 # the synch pair is identical
2025-07-01 17:49:07.680 yield ' ' + aelt
2025-07-01 17:49:07.681
2025-07-01 17:49:07.681 # pump out diffs from after the synch point
2025-07-01 17:49:07.681 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.681
2025-07-01 17:49:07.681 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.681 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.681
2025-07-01 17:49:07.681 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.681 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.681 alo = 297, ahi = 1101
2025-07-01 17:49:07.681 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.681 blo = 297, bhi = 1101
2025-07-01 17:49:07.681
2025-07-01 17:49:07.681 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.681 g = []
2025-07-01 17:49:07.681 if alo < ahi:
2025-07-01 17:49:07.681 if blo < bhi:
2025-07-01 17:49:07.681 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.681 else:
2025-07-01 17:49:07.681 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.681 elif blo < bhi:
2025-07-01 17:49:07.682 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.682
2025-07-01 17:49:07.682 > yield from g
2025-07-01 17:49:07.682
2025-07-01 17:49:07.682 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.682 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.682
2025-07-01 17:49:07.682 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.682 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.682 alo = 297, ahi = 1101
2025-07-01 17:49:07.682 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.682 blo = 297, bhi = 1101
2025-07-01 17:49:07.682
2025-07-01 17:49:07.682 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.682 r"""
2025-07-01 17:49:07.682 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.682 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.682 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.682 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.683
2025-07-01 17:49:07.683 Example:
2025-07-01 17:49:07.683
2025-07-01 17:49:07.683 >>> d = Differ()
2025-07-01 17:49:07.683 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.683 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.683 >>> print(''.join(results), end="")
2025-07-01 17:49:07.683 - abcDefghiJkl
2025-07-01 17:49:07.683 + abcdefGhijkl
2025-07-01 17:49:07.683 """
2025-07-01 17:49:07.683
2025-07-01 17:49:07.683 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.683 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.683 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.683 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.683 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.683
2025-07-01 17:49:07.683 # search for the pair that matches best without being identical
2025-07-01 17:49:07.684 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.684 # on junk -- unless we have to)
2025-07-01 17:49:07.684 for j in range(blo, bhi):
2025-07-01 17:49:07.684 bj = b[j]
2025-07-01 17:49:07.684 cruncher.set_seq2(bj)
2025-07-01 17:49:07.684 for i in range(alo, ahi):
2025-07-01 17:49:07.684 ai = a[i]
2025-07-01 17:49:07.684 if ai == bj:
2025-07-01 17:49:07.684 if eqi is None:
2025-07-01 17:49:07.684 eqi, eqj = i, j
2025-07-01 17:49:07.684 continue
2025-07-01 17:49:07.684 cruncher.set_seq1(ai)
2025-07-01 17:49:07.684 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.684 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.684 # compares by a factor of 3.
2025-07-01 17:49:07.684 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.684 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.684 # of the computation is cached by cruncher
2025-07-01 17:49:07.684 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.684 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.684 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.685 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.685 if best_ratio < cutoff:
2025-07-01 17:49:07.685 # no non-identical "pretty close" pair
2025-07-01 17:49:07.685 if eqi is None:
2025-07-01 17:49:07.685 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.685 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.685 return
2025-07-01 17:49:07.685 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.685 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.685 else:
2025-07-01 17:49:07.685 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.685 eqi = None
2025-07-01 17:49:07.685
2025-07-01 17:49:07.685 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.685 # identical
2025-07-01 17:49:07.685
2025-07-01 17:49:07.685 # pump out diffs from before the synch point
2025-07-01 17:49:07.685 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.685
2025-07-01 17:49:07.685 # do intraline marking on the synch pair
2025-07-01 17:49:07.686 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.686 if eqi is None:
2025-07-01 17:49:07.686 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.686 atags = btags = ""
2025-07-01 17:49:07.686 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.686 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.686 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.686 if tag == 'replace':
2025-07-01 17:49:07.686 atags += '^' * la
2025-07-01 17:49:07.686 btags += '^' * lb
2025-07-01 17:49:07.686 elif tag == 'delete':
2025-07-01 17:49:07.686 atags += '-' * la
2025-07-01 17:49:07.686 elif tag == 'insert':
2025-07-01 17:49:07.686 btags += '+' * lb
2025-07-01 17:49:07.686 elif tag == 'equal':
2025-07-01 17:49:07.686 atags += ' ' * la
2025-07-01 17:49:07.686 btags += ' ' * lb
2025-07-01 17:49:07.686 else:
2025-07-01 17:49:07.686 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.686 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.686 else:
2025-07-01 17:49:07.687 # the synch pair is identical
2025-07-01 17:49:07.687 yield ' ' + aelt
2025-07-01 17:49:07.687
2025-07-01 17:49:07.687 # pump out diffs from after the synch point
2025-07-01 17:49:07.687 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.687
2025-07-01 17:49:07.687 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.687 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.687
2025-07-01 17:49:07.687 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.687 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.687 alo = 298, ahi = 1101
2025-07-01 17:49:07.687 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.687 blo = 298, bhi = 1101
2025-07-01 17:49:07.687
2025-07-01 17:49:07.687 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.687 g = []
2025-07-01 17:49:07.687 if alo < ahi:
2025-07-01 17:49:07.687 if blo < bhi:
2025-07-01 17:49:07.687 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.688 else:
2025-07-01 17:49:07.690 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.691 elif blo < bhi:
2025-07-01 17:49:07.691 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.691
2025-07-01 17:49:07.691 > yield from g
2025-07-01 17:49:07.691
2025-07-01 17:49:07.691 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.691 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.691
2025-07-01 17:49:07.691 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.691 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.691 alo = 298, ahi = 1101
2025-07-01 17:49:07.691 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.691 blo = 298, bhi = 1101
2025-07-01 17:49:07.691
2025-07-01 17:49:07.691 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.691 r"""
2025-07-01 17:49:07.691 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.691 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.691 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.691 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.692
2025-07-01 17:49:07.692 Example:
2025-07-01 17:49:07.692
2025-07-01 17:49:07.692 >>> d = Differ()
2025-07-01 17:49:07.692 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.692 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.692 >>> print(''.join(results), end="")
2025-07-01 17:49:07.692 - abcDefghiJkl
2025-07-01 17:49:07.692 + abcdefGhijkl
2025-07-01 17:49:07.692 """
2025-07-01 17:49:07.692
2025-07-01 17:49:07.692 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.692 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.692 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.692 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.692 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.692
2025-07-01 17:49:07.692 # search for the pair that matches best without being identical
2025-07-01 17:49:07.692 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.692 # on junk -- unless we have to)
2025-07-01 17:49:07.693 for j in range(blo, bhi):
2025-07-01 17:49:07.693 bj = b[j]
2025-07-01 17:49:07.693 cruncher.set_seq2(bj)
2025-07-01 17:49:07.693 for i in range(alo, ahi):
2025-07-01 17:49:07.693 ai = a[i]
2025-07-01 17:49:07.693 if ai == bj:
2025-07-01 17:49:07.693 if eqi is None:
2025-07-01 17:49:07.693 eqi, eqj = i, j
2025-07-01 17:49:07.693 continue
2025-07-01 17:49:07.693 cruncher.set_seq1(ai)
2025-07-01 17:49:07.693 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.693 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.693 # compares by a factor of 3.
2025-07-01 17:49:07.693 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.693 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.693 # of the computation is cached by cruncher
2025-07-01 17:49:07.693 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.693 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.693 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.693 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.693 if best_ratio < cutoff:
2025-07-01 17:49:07.694 # no non-identical "pretty close" pair
2025-07-01 17:49:07.694 if eqi is None:
2025-07-01 17:49:07.694 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.694 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.694 return
2025-07-01 17:49:07.694 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.694 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.694 else:
2025-07-01 17:49:07.694 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.694 eqi = None
2025-07-01 17:49:07.694
2025-07-01 17:49:07.694 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.694 # identical
2025-07-01 17:49:07.694
2025-07-01 17:49:07.694 # pump out diffs from before the synch point
2025-07-01 17:49:07.694 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.694
2025-07-01 17:49:07.694 # do intraline marking on the synch pair
2025-07-01 17:49:07.695 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.695 if eqi is None:
2025-07-01 17:49:07.695 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.695 atags = btags = ""
2025-07-01 17:49:07.695 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.695 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.695 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.695 if tag == 'replace':
2025-07-01 17:49:07.695 atags += '^' * la
2025-07-01 17:49:07.695 btags += '^' * lb
2025-07-01 17:49:07.695 elif tag == 'delete':
2025-07-01 17:49:07.695 atags += '-' * la
2025-07-01 17:49:07.695 elif tag == 'insert':
2025-07-01 17:49:07.695 btags += '+' * lb
2025-07-01 17:49:07.695 elif tag == 'equal':
2025-07-01 17:49:07.695 atags += ' ' * la
2025-07-01 17:49:07.695 btags += ' ' * lb
2025-07-01 17:49:07.695 else:
2025-07-01 17:49:07.695 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.695 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.695 else:
2025-07-01 17:49:07.696 # the synch pair is identical
2025-07-01 17:49:07.696 yield ' ' + aelt
2025-07-01 17:49:07.696
2025-07-01 17:49:07.696 # pump out diffs from after the synch point
2025-07-01 17:49:07.696 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.696
2025-07-01 17:49:07.696 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.696 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.696
2025-07-01 17:49:07.696 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.696 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.696 alo = 299, ahi = 1101
2025-07-01 17:49:07.696 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.696 blo = 299, bhi = 1101
2025-07-01 17:49:07.696
2025-07-01 17:49:07.696 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.696 g = []
2025-07-01 17:49:07.696 if alo < ahi:
2025-07-01 17:49:07.697 if blo < bhi:
2025-07-01 17:49:07.697 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.697 else:
2025-07-01 17:49:07.697 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.697 elif blo < bhi:
2025-07-01 17:49:07.697 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.697
2025-07-01 17:49:07.697 > yield from g
2025-07-01 17:49:07.697
2025-07-01 17:49:07.697 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.697 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.697
2025-07-01 17:49:07.697 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.697 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.697 alo = 299, ahi = 1101
2025-07-01 17:49:07.697 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.697 blo = 299, bhi = 1101
2025-07-01 17:49:07.697
2025-07-01 17:49:07.697 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.697 r"""
2025-07-01 17:49:07.697 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.698 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.698 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.698 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.698
2025-07-01 17:49:07.698 Example:
2025-07-01 17:49:07.698
2025-07-01 17:49:07.698 >>> d = Differ()
2025-07-01 17:49:07.698 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.698 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.698 >>> print(''.join(results), end="")
2025-07-01 17:49:07.698 - abcDefghiJkl
2025-07-01 17:49:07.698 + abcdefGhijkl
2025-07-01 17:49:07.698 """
2025-07-01 17:49:07.698
2025-07-01 17:49:07.698 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.698 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.698 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.698 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.699 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.699
2025-07-01 17:49:07.699 # search for the pair that matches best without being identical
2025-07-01 17:49:07.699 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.699 # on junk -- unless we have to)
2025-07-01 17:49:07.699 for j in range(blo, bhi):
2025-07-01 17:49:07.699 bj = b[j]
2025-07-01 17:49:07.699 cruncher.set_seq2(bj)
2025-07-01 17:49:07.699 for i in range(alo, ahi):
2025-07-01 17:49:07.699 ai = a[i]
2025-07-01 17:49:07.699 if ai == bj:
2025-07-01 17:49:07.699 if eqi is None:
2025-07-01 17:49:07.699 eqi, eqj = i, j
2025-07-01 17:49:07.699 continue
2025-07-01 17:49:07.699 cruncher.set_seq1(ai)
2025-07-01 17:49:07.699 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.699 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.699 # compares by a factor of 3.
2025-07-01 17:49:07.699 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.699 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.699 # of the computation is cached by cruncher
2025-07-01 17:49:07.700 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.700 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.700 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.700 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.700 if best_ratio < cutoff:
2025-07-01 17:49:07.700 # no non-identical "pretty close" pair
2025-07-01 17:49:07.700 if eqi is None:
2025-07-01 17:49:07.700 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.700 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.700 return
2025-07-01 17:49:07.700 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.700 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.700 else:
2025-07-01 17:49:07.700 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.700 eqi = None
2025-07-01 17:49:07.700
2025-07-01 17:49:07.700 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.700 # identical
2025-07-01 17:49:07.700
2025-07-01 17:49:07.700 # pump out diffs from before the synch point
2025-07-01 17:49:07.701 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.701
2025-07-01 17:49:07.701 # do intraline marking on the synch pair
2025-07-01 17:49:07.701 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.701 if eqi is None:
2025-07-01 17:49:07.701 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.701 atags = btags = ""
2025-07-01 17:49:07.701 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.701 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.701 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.701 if tag == 'replace':
2025-07-01 17:49:07.701 atags += '^' * la
2025-07-01 17:49:07.701 btags += '^' * lb
2025-07-01 17:49:07.701 elif tag == 'delete':
2025-07-01 17:49:07.701 atags += '-' * la
2025-07-01 17:49:07.701 elif tag == 'insert':
2025-07-01 17:49:07.701 btags += '+' * lb
2025-07-01 17:49:07.701 elif tag == 'equal':
2025-07-01 17:49:07.701 atags += ' ' * la
2025-07-01 17:49:07.701 btags += ' ' * lb
2025-07-01 17:49:07.701 else:
2025-07-01 17:49:07.702 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.702 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.702 else:
2025-07-01 17:49:07.702 # the synch pair is identical
2025-07-01 17:49:07.702 yield ' ' + aelt
2025-07-01 17:49:07.702
2025-07-01 17:49:07.702 # pump out diffs from after the synch point
2025-07-01 17:49:07.702 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.702
2025-07-01 17:49:07.702 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.702 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.702
2025-07-01 17:49:07.702 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.702 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.702 alo = 300, ahi = 1101
2025-07-01 17:49:07.702 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.702 blo = 300, bhi = 1101
2025-07-01 17:49:07.702
2025-07-01 17:49:07.702 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.708 g = []
2025-07-01 17:49:07.708 if alo < ahi:
2025-07-01 17:49:07.708 if blo < bhi:
2025-07-01 17:49:07.708 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.708 else:
2025-07-01 17:49:07.708 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.708 elif blo < bhi:
2025-07-01 17:49:07.708 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.708
2025-07-01 17:49:07.709 > yield from g
2025-07-01 17:49:07.709
2025-07-01 17:49:07.709 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.709 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.709
2025-07-01 17:49:07.709 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.709 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.709 alo = 300, ahi = 1101
2025-07-01 17:49:07.709 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.709 blo = 300, bhi = 1101
2025-07-01 17:49:07.709
2025-07-01 17:49:07.709 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.709 r"""
2025-07-01 17:49:07.709 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.709 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.709 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.709 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.709
2025-07-01 17:49:07.709 Example:
2025-07-01 17:49:07.710
2025-07-01 17:49:07.710 >>> d = Differ()
2025-07-01 17:49:07.710 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.710 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.710 >>> print(''.join(results), end="")
2025-07-01 17:49:07.710 - abcDefghiJkl
2025-07-01 17:49:07.710 + abcdefGhijkl
2025-07-01 17:49:07.710 """
2025-07-01 17:49:07.710
2025-07-01 17:49:07.710 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.710 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.710 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.710 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.710 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.710
2025-07-01 17:49:07.710 # search for the pair that matches best without being identical
2025-07-01 17:49:07.711 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.711 # on junk -- unless we have to)
2025-07-01 17:49:07.711 for j in range(blo, bhi):
2025-07-01 17:49:07.711 bj = b[j]
2025-07-01 17:49:07.711 cruncher.set_seq2(bj)
2025-07-01 17:49:07.711 for i in range(alo, ahi):
2025-07-01 17:49:07.711 ai = a[i]
2025-07-01 17:49:07.711 if ai == bj:
2025-07-01 17:49:07.711 if eqi is None:
2025-07-01 17:49:07.711 eqi, eqj = i, j
2025-07-01 17:49:07.711 continue
2025-07-01 17:49:07.711 cruncher.set_seq1(ai)
2025-07-01 17:49:07.711 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.711 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.711 # compares by a factor of 3.
2025-07-01 17:49:07.711 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.711 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.711 # of the computation is cached by cruncher
2025-07-01 17:49:07.711 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.711 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.712 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.712 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.712 if best_ratio < cutoff:
2025-07-01 17:49:07.712 # no non-identical "pretty close" pair
2025-07-01 17:49:07.712 if eqi is None:
2025-07-01 17:49:07.712 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.712 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.712 return
2025-07-01 17:49:07.712 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.712 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.712 else:
2025-07-01 17:49:07.712 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.712 eqi = None
2025-07-01 17:49:07.712
2025-07-01 17:49:07.712 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.712 # identical
2025-07-01 17:49:07.712
2025-07-01 17:49:07.712 # pump out diffs from before the synch point
2025-07-01 17:49:07.712 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.712
2025-07-01 17:49:07.713 # do intraline marking on the synch pair
2025-07-01 17:49:07.713 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.713 if eqi is None:
2025-07-01 17:49:07.713 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.713 atags = btags = ""
2025-07-01 17:49:07.713 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.713 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.713 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.713 if tag == 'replace':
2025-07-01 17:49:07.713 atags += '^' * la
2025-07-01 17:49:07.713 btags += '^' * lb
2025-07-01 17:49:07.713 elif tag == 'delete':
2025-07-01 17:49:07.713 atags += '-' * la
2025-07-01 17:49:07.713 elif tag == 'insert':
2025-07-01 17:49:07.713 btags += '+' * lb
2025-07-01 17:49:07.713 elif tag == 'equal':
2025-07-01 17:49:07.713 atags += ' ' * la
2025-07-01 17:49:07.713 btags += ' ' * lb
2025-07-01 17:49:07.713 else:
2025-07-01 17:49:07.713 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.714 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.714 else:
2025-07-01 17:49:07.714 # the synch pair is identical
2025-07-01 17:49:07.714 yield ' ' + aelt
2025-07-01 17:49:07.714
2025-07-01 17:49:07.714 # pump out diffs from after the synch point
2025-07-01 17:49:07.714 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.714
2025-07-01 17:49:07.714 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.714 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.714
2025-07-01 17:49:07.714 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.714 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.714 alo = 301, ahi = 1101
2025-07-01 17:49:07.714 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.714 blo = 301, bhi = 1101
2025-07-01 17:49:07.714
2025-07-01 17:49:07.714 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.714 g = []
2025-07-01 17:49:07.714 if alo < ahi:
2025-07-01 17:49:07.715 if blo < bhi:
2025-07-01 17:49:07.715 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.715 else:
2025-07-01 17:49:07.715 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.715 elif blo < bhi:
2025-07-01 17:49:07.715 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.715
2025-07-01 17:49:07.715 > yield from g
2025-07-01 17:49:07.715
2025-07-01 17:49:07.715 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.715 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.715
2025-07-01 17:49:07.715 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.715 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.715 alo = 301, ahi = 1101
2025-07-01 17:49:07.715 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.715 blo = 301, bhi = 1101
2025-07-01 17:49:07.715
2025-07-01 17:49:07.715 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.715 r"""
2025-07-01 17:49:07.716 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.716 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.716 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.716 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.716
2025-07-01 17:49:07.716 Example:
2025-07-01 17:49:07.716
2025-07-01 17:49:07.716 >>> d = Differ()
2025-07-01 17:49:07.716 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.716 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.716 >>> print(''.join(results), end="")
2025-07-01 17:49:07.716 - abcDefghiJkl
2025-07-01 17:49:07.716 + abcdefGhijkl
2025-07-01 17:49:07.716 """
2025-07-01 17:49:07.716
2025-07-01 17:49:07.716 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.716 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.716 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.716 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.717 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.717
2025-07-01 17:49:07.717 # search for the pair that matches best without being identical
2025-07-01 17:49:07.717 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.717 # on junk -- unless we have to)
2025-07-01 17:49:07.717 for j in range(blo, bhi):
2025-07-01 17:49:07.717 bj = b[j]
2025-07-01 17:49:07.717 cruncher.set_seq2(bj)
2025-07-01 17:49:07.717 for i in range(alo, ahi):
2025-07-01 17:49:07.717 ai = a[i]
2025-07-01 17:49:07.717 if ai == bj:
2025-07-01 17:49:07.717 if eqi is None:
2025-07-01 17:49:07.717 eqi, eqj = i, j
2025-07-01 17:49:07.717 continue
2025-07-01 17:49:07.717 cruncher.set_seq1(ai)
2025-07-01 17:49:07.717 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.717 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.717 # compares by a factor of 3.
2025-07-01 17:49:07.718 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.718 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.718 # of the computation is cached by cruncher
2025-07-01 17:49:07.718 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.718 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.718 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.718 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.718 if best_ratio < cutoff:
2025-07-01 17:49:07.718 # no non-identical "pretty close" pair
2025-07-01 17:49:07.718 if eqi is None:
2025-07-01 17:49:07.718 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.718 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.718 return
2025-07-01 17:49:07.718 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.718 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.718 else:
2025-07-01 17:49:07.718 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.718 eqi = None
2025-07-01 17:49:07.718
2025-07-01 17:49:07.718 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.719 # identical
2025-07-01 17:49:07.721
2025-07-01 17:49:07.722 # pump out diffs from before the synch point
2025-07-01 17:49:07.722 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.722
2025-07-01 17:49:07.722 # do intraline marking on the synch pair
2025-07-01 17:49:07.722 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.722 if eqi is None:
2025-07-01 17:49:07.722 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.722 atags = btags = ""
2025-07-01 17:49:07.722 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.722 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.722 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.722 if tag == 'replace':
2025-07-01 17:49:07.722 atags += '^' * la
2025-07-01 17:49:07.722 btags += '^' * lb
2025-07-01 17:49:07.722 elif tag == 'delete':
2025-07-01 17:49:07.722 atags += '-' * la
2025-07-01 17:49:07.722 elif tag == 'insert':
2025-07-01 17:49:07.722 btags += '+' * lb
2025-07-01 17:49:07.722 elif tag == 'equal':
2025-07-01 17:49:07.722 atags += ' ' * la
2025-07-01 17:49:07.723 btags += ' ' * lb
2025-07-01 17:49:07.723 else:
2025-07-01 17:49:07.723 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.723 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.723 else:
2025-07-01 17:49:07.723 # the synch pair is identical
2025-07-01 17:49:07.723 yield ' ' + aelt
2025-07-01 17:49:07.723
2025-07-01 17:49:07.723 # pump out diffs from after the synch point
2025-07-01 17:49:07.723 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.723
2025-07-01 17:49:07.723 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.723 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.723
2025-07-01 17:49:07.723 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.723 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.723 alo = 302, ahi = 1101
2025-07-01 17:49:07.723 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.723 blo = 302, bhi = 1101
2025-07-01 17:49:07.723
2025-07-01 17:49:07.724 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.724 g = []
2025-07-01 17:49:07.724 if alo < ahi:
2025-07-01 17:49:07.724 if blo < bhi:
2025-07-01 17:49:07.724 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.724 else:
2025-07-01 17:49:07.724 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.724 elif blo < bhi:
2025-07-01 17:49:07.724 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.724
2025-07-01 17:49:07.724 > yield from g
2025-07-01 17:49:07.724
2025-07-01 17:49:07.724 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.724 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.724
2025-07-01 17:49:07.724 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.724 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.724 alo = 302, ahi = 1101
2025-07-01 17:49:07.724 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.725 blo = 302, bhi = 1101
2025-07-01 17:49:07.725
2025-07-01 17:49:07.725 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.725 r"""
2025-07-01 17:49:07.725 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.725 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.725 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.725 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.725
2025-07-01 17:49:07.725 Example:
2025-07-01 17:49:07.725
2025-07-01 17:49:07.725 >>> d = Differ()
2025-07-01 17:49:07.725 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.725 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.725 >>> print(''.join(results), end="")
2025-07-01 17:49:07.725 - abcDefghiJkl
2025-07-01 17:49:07.725 + abcdefGhijkl
2025-07-01 17:49:07.725 """
2025-07-01 17:49:07.725
2025-07-01 17:49:07.726 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.726 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.726 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.726 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.726 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.726
2025-07-01 17:49:07.726 # search for the pair that matches best without being identical
2025-07-01 17:49:07.726 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.726 # on junk -- unless we have to)
2025-07-01 17:49:07.726 for j in range(blo, bhi):
2025-07-01 17:49:07.726 bj = b[j]
2025-07-01 17:49:07.726 cruncher.set_seq2(bj)
2025-07-01 17:49:07.726 for i in range(alo, ahi):
2025-07-01 17:49:07.726 ai = a[i]
2025-07-01 17:49:07.726 if ai == bj:
2025-07-01 17:49:07.726 if eqi is None:
2025-07-01 17:49:07.726 eqi, eqj = i, j
2025-07-01 17:49:07.726 continue
2025-07-01 17:49:07.726 cruncher.set_seq1(ai)
2025-07-01 17:49:07.726 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.727 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.727 # compares by a factor of 3.
2025-07-01 17:49:07.727 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.727 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.727 # of the computation is cached by cruncher
2025-07-01 17:49:07.727 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.727 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.727 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.727 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.727 if best_ratio < cutoff:
2025-07-01 17:49:07.727 # no non-identical "pretty close" pair
2025-07-01 17:49:07.727 if eqi is None:
2025-07-01 17:49:07.727 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.727 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.727 return
2025-07-01 17:49:07.727 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.727 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.727 else:
2025-07-01 17:49:07.727 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.727 eqi = None
2025-07-01 17:49:07.728
2025-07-01 17:49:07.728 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.728 # identical
2025-07-01 17:49:07.728
2025-07-01 17:49:07.728 # pump out diffs from before the synch point
2025-07-01 17:49:07.728 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.728
2025-07-01 17:49:07.728 # do intraline marking on the synch pair
2025-07-01 17:49:07.728 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.728 if eqi is None:
2025-07-01 17:49:07.728 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.728 atags = btags = ""
2025-07-01 17:49:07.728 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.728 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.728 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.728 if tag == 'replace':
2025-07-01 17:49:07.728 atags += '^' * la
2025-07-01 17:49:07.728 btags += '^' * lb
2025-07-01 17:49:07.728 elif tag == 'delete':
2025-07-01 17:49:07.728 atags += '-' * la
2025-07-01 17:49:07.729 elif tag == 'insert':
2025-07-01 17:49:07.729 btags += '+' * lb
2025-07-01 17:49:07.729 elif tag == 'equal':
2025-07-01 17:49:07.729 atags += ' ' * la
2025-07-01 17:49:07.729 btags += ' ' * lb
2025-07-01 17:49:07.729 else:
2025-07-01 17:49:07.729 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.729 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.729 else:
2025-07-01 17:49:07.729 # the synch pair is identical
2025-07-01 17:49:07.729 yield ' ' + aelt
2025-07-01 17:49:07.729
2025-07-01 17:49:07.729 # pump out diffs from after the synch point
2025-07-01 17:49:07.729 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.729
2025-07-01 17:49:07.729 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.729 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.729
2025-07-01 17:49:07.729 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.729 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.730 alo = 303, ahi = 1101
2025-07-01 17:49:07.730 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.730 blo = 303, bhi = 1101
2025-07-01 17:49:07.730
2025-07-01 17:49:07.730 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.730 g = []
2025-07-01 17:49:07.730 if alo < ahi:
2025-07-01 17:49:07.730 if blo < bhi:
2025-07-01 17:49:07.730 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.730 else:
2025-07-01 17:49:07.730 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.730 elif blo < bhi:
2025-07-01 17:49:07.730 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.730
2025-07-01 17:49:07.730 > yield from g
2025-07-01 17:49:07.730
2025-07-01 17:49:07.730 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.730 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.730
2025-07-01 17:49:07.730 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.731 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.731 alo = 303, ahi = 1101
2025-07-01 17:49:07.731 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.731 blo = 303, bhi = 1101
2025-07-01 17:49:07.731
2025-07-01 17:49:07.731 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.731 r"""
2025-07-01 17:49:07.731 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.731 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.731 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.731 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.731
2025-07-01 17:49:07.731 Example:
2025-07-01 17:49:07.731
2025-07-01 17:49:07.731 >>> d = Differ()
2025-07-01 17:49:07.731 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.731 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.731 >>> print(''.join(results), end="")
2025-07-01 17:49:07.731 - abcDefghiJkl
2025-07-01 17:49:07.731 + abcdefGhijkl
2025-07-01 17:49:07.732 """
2025-07-01 17:49:07.732
2025-07-01 17:49:07.732 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.732 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.732 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.732 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.732 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.732
2025-07-01 17:49:07.732 # search for the pair that matches best without being identical
2025-07-01 17:49:07.732 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.732 # on junk -- unless we have to)
2025-07-01 17:49:07.732 for j in range(blo, bhi):
2025-07-01 17:49:07.732 bj = b[j]
2025-07-01 17:49:07.732 cruncher.set_seq2(bj)
2025-07-01 17:49:07.732 for i in range(alo, ahi):
2025-07-01 17:49:07.732 ai = a[i]
2025-07-01 17:49:07.732 if ai == bj:
2025-07-01 17:49:07.732 if eqi is None:
2025-07-01 17:49:07.732 eqi, eqj = i, j
2025-07-01 17:49:07.733 continue
2025-07-01 17:49:07.733 cruncher.set_seq1(ai)
2025-07-01 17:49:07.733 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.733 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.733 # compares by a factor of 3.
2025-07-01 17:49:07.733 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.733 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.733 # of the computation is cached by cruncher
2025-07-01 17:49:07.733 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.733 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.733 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.733 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.733 if best_ratio < cutoff:
2025-07-01 17:49:07.733 # no non-identical "pretty close" pair
2025-07-01 17:49:07.733 if eqi is None:
2025-07-01 17:49:07.733 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.733 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.733 return
2025-07-01 17:49:07.733 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.733 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.734 else:
2025-07-01 17:49:07.734 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.734 eqi = None
2025-07-01 17:49:07.734
2025-07-01 17:49:07.734 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.734 # identical
2025-07-01 17:49:07.734
2025-07-01 17:49:07.734 # pump out diffs from before the synch point
2025-07-01 17:49:07.734 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.734
2025-07-01 17:49:07.734 # do intraline marking on the synch pair
2025-07-01 17:49:07.734 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.734 if eqi is None:
2025-07-01 17:49:07.734 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.734 atags = btags = ""
2025-07-01 17:49:07.734 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.734 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.734 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.734 if tag == 'replace':
2025-07-01 17:49:07.734 atags += '^' * la
2025-07-01 17:49:07.734 btags += '^' * lb
2025-07-01 17:49:07.735 elif tag == 'delete':
2025-07-01 17:49:07.740 atags += '-' * la
2025-07-01 17:49:07.740 elif tag == 'insert':
2025-07-01 17:49:07.740 btags += '+' * lb
2025-07-01 17:49:07.740 elif tag == 'equal':
2025-07-01 17:49:07.740 atags += ' ' * la
2025-07-01 17:49:07.740 btags += ' ' * lb
2025-07-01 17:49:07.740 else:
2025-07-01 17:49:07.740 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.740 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.740 else:
2025-07-01 17:49:07.740 # the synch pair is identical
2025-07-01 17:49:07.740 yield ' ' + aelt
2025-07-01 17:49:07.740
2025-07-01 17:49:07.740 # pump out diffs from after the synch point
2025-07-01 17:49:07.740 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.740
2025-07-01 17:49:07.740 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.740 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.740
2025-07-01 17:49:07.741 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.741 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.741 alo = 304, ahi = 1101
2025-07-01 17:49:07.741 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.741 blo = 304, bhi = 1101
2025-07-01 17:49:07.741
2025-07-01 17:49:07.741 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.741 g = []
2025-07-01 17:49:07.741 if alo < ahi:
2025-07-01 17:49:07.741 if blo < bhi:
2025-07-01 17:49:07.741 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.741 else:
2025-07-01 17:49:07.741 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.741 elif blo < bhi:
2025-07-01 17:49:07.741 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.741
2025-07-01 17:49:07.741 > yield from g
2025-07-01 17:49:07.741
2025-07-01 17:49:07.741 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.741 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.742
2025-07-01 17:49:07.742 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.742 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.742 alo = 304, ahi = 1101
2025-07-01 17:49:07.742 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.742 blo = 304, bhi = 1101
2025-07-01 17:49:07.742
2025-07-01 17:49:07.742 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.742 r"""
2025-07-01 17:49:07.742 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.742 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.742 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.742 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.742
2025-07-01 17:49:07.742 Example:
2025-07-01 17:49:07.742
2025-07-01 17:49:07.742 >>> d = Differ()
2025-07-01 17:49:07.742 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.743 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.743 >>> print(''.join(results), end="")
2025-07-01 17:49:07.743 - abcDefghiJkl
2025-07-01 17:49:07.743 + abcdefGhijkl
2025-07-01 17:49:07.743 """
2025-07-01 17:49:07.743
2025-07-01 17:49:07.743 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.743 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.743 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.743 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.743 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.743
2025-07-01 17:49:07.743 # search for the pair that matches best without being identical
2025-07-01 17:49:07.743 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.743 # on junk -- unless we have to)
2025-07-01 17:49:07.743 for j in range(blo, bhi):
2025-07-01 17:49:07.743 bj = b[j]
2025-07-01 17:49:07.743 cruncher.set_seq2(bj)
2025-07-01 17:49:07.744 for i in range(alo, ahi):
2025-07-01 17:49:07.744 ai = a[i]
2025-07-01 17:49:07.744 if ai == bj:
2025-07-01 17:49:07.744 if eqi is None:
2025-07-01 17:49:07.744 eqi, eqj = i, j
2025-07-01 17:49:07.744 continue
2025-07-01 17:49:07.744 cruncher.set_seq1(ai)
2025-07-01 17:49:07.744 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.744 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.744 # compares by a factor of 3.
2025-07-01 17:49:07.744 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.744 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.744 # of the computation is cached by cruncher
2025-07-01 17:49:07.744 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.744 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.744 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.744 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.744 if best_ratio < cutoff:
2025-07-01 17:49:07.744 # no non-identical "pretty close" pair
2025-07-01 17:49:07.744 if eqi is None:
2025-07-01 17:49:07.744 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.745 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.745 return
2025-07-01 17:49:07.745 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.745 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.745 else:
2025-07-01 17:49:07.745 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.745 eqi = None
2025-07-01 17:49:07.745
2025-07-01 17:49:07.745 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.745 # identical
2025-07-01 17:49:07.745
2025-07-01 17:49:07.745 # pump out diffs from before the synch point
2025-07-01 17:49:07.745 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.745
2025-07-01 17:49:07.745 # do intraline marking on the synch pair
2025-07-01 17:49:07.745 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.745 if eqi is None:
2025-07-01 17:49:07.745 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.745 atags = btags = ""
2025-07-01 17:49:07.745 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.745 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.745 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.746 if tag == 'replace':
2025-07-01 17:49:07.746 atags += '^' * la
2025-07-01 17:49:07.746 btags += '^' * lb
2025-07-01 17:49:07.746 elif tag == 'delete':
2025-07-01 17:49:07.746 atags += '-' * la
2025-07-01 17:49:07.746 elif tag == 'insert':
2025-07-01 17:49:07.746 btags += '+' * lb
2025-07-01 17:49:07.746 elif tag == 'equal':
2025-07-01 17:49:07.746 atags += ' ' * la
2025-07-01 17:49:07.746 btags += ' ' * lb
2025-07-01 17:49:07.746 else:
2025-07-01 17:49:07.746 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.746 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.746 else:
2025-07-01 17:49:07.746 # the synch pair is identical
2025-07-01 17:49:07.746 yield ' ' + aelt
2025-07-01 17:49:07.746
2025-07-01 17:49:07.746 # pump out diffs from after the synch point
2025-07-01 17:49:07.746 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.746
2025-07-01 17:49:07.746 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.746 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.747
2025-07-01 17:49:07.747 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.747 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.747 alo = 305, ahi = 1101
2025-07-01 17:49:07.747 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.747 blo = 305, bhi = 1101
2025-07-01 17:49:07.747
2025-07-01 17:49:07.747 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.747 g = []
2025-07-01 17:49:07.747 if alo < ahi:
2025-07-01 17:49:07.747 if blo < bhi:
2025-07-01 17:49:07.747 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.747 else:
2025-07-01 17:49:07.747 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.747 elif blo < bhi:
2025-07-01 17:49:07.747 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.747
2025-07-01 17:49:07.747 > yield from g
2025-07-01 17:49:07.747
2025-07-01 17:49:07.747 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.747 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.748
2025-07-01 17:49:07.748 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.748 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.748 alo = 305, ahi = 1101
2025-07-01 17:49:07.748 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.748 blo = 305, bhi = 1101
2025-07-01 17:49:07.748
2025-07-01 17:49:07.748 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.748 r"""
2025-07-01 17:49:07.748 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.748 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.748 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.748 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.748
2025-07-01 17:49:07.748 Example:
2025-07-01 17:49:07.748
2025-07-01 17:49:07.748 >>> d = Differ()
2025-07-01 17:49:07.748 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.748 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.748 >>> print(''.join(results), end="")
2025-07-01 17:49:07.749 - abcDefghiJkl
2025-07-01 17:49:07.749 + abcdefGhijkl
2025-07-01 17:49:07.749 """
2025-07-01 17:49:07.749
2025-07-01 17:49:07.749 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.749 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.749 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.749 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.749 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.749
2025-07-01 17:49:07.749 # search for the pair that matches best without being identical
2025-07-01 17:49:07.749 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.749 # on junk -- unless we have to)
2025-07-01 17:49:07.749 for j in range(blo, bhi):
2025-07-01 17:49:07.749 bj = b[j]
2025-07-01 17:49:07.749 cruncher.set_seq2(bj)
2025-07-01 17:49:07.749 for i in range(alo, ahi):
2025-07-01 17:49:07.749 ai = a[i]
2025-07-01 17:49:07.749 if ai == bj:
2025-07-01 17:49:07.750 if eqi is None:
2025-07-01 17:49:07.753 eqi, eqj = i, j
2025-07-01 17:49:07.753 continue
2025-07-01 17:49:07.753 cruncher.set_seq1(ai)
2025-07-01 17:49:07.753 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.753 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.753 # compares by a factor of 3.
2025-07-01 17:49:07.753 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.753 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.753 # of the computation is cached by cruncher
2025-07-01 17:49:07.753 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.753 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.753 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.753 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.753 if best_ratio < cutoff:
2025-07-01 17:49:07.753 # no non-identical "pretty close" pair
2025-07-01 17:49:07.753 if eqi is None:
2025-07-01 17:49:07.753 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.753 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.754 return
2025-07-01 17:49:07.754 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.754 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.754 else:
2025-07-01 17:49:07.754 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.754 eqi = None
2025-07-01 17:49:07.754
2025-07-01 17:49:07.754 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.754 # identical
2025-07-01 17:49:07.754
2025-07-01 17:49:07.754 # pump out diffs from before the synch point
2025-07-01 17:49:07.754 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.754
2025-07-01 17:49:07.754 # do intraline marking on the synch pair
2025-07-01 17:49:07.754 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.754 if eqi is None:
2025-07-01 17:49:07.754 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.754 atags = btags = ""
2025-07-01 17:49:07.754 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.754 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.754 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.755 if tag == 'replace':
2025-07-01 17:49:07.755 atags += '^' * la
2025-07-01 17:49:07.755 btags += '^' * lb
2025-07-01 17:49:07.755 elif tag == 'delete':
2025-07-01 17:49:07.755 atags += '-' * la
2025-07-01 17:49:07.755 elif tag == 'insert':
2025-07-01 17:49:07.755 btags += '+' * lb
2025-07-01 17:49:07.755 elif tag == 'equal':
2025-07-01 17:49:07.755 atags += ' ' * la
2025-07-01 17:49:07.755 btags += ' ' * lb
2025-07-01 17:49:07.755 else:
2025-07-01 17:49:07.755 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.755 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.755 else:
2025-07-01 17:49:07.755 # the synch pair is identical
2025-07-01 17:49:07.755 yield ' ' + aelt
2025-07-01 17:49:07.755
2025-07-01 17:49:07.755 # pump out diffs from after the synch point
2025-07-01 17:49:07.755 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.755
2025-07-01 17:49:07.755 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.756 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.756
2025-07-01 17:49:07.756 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.756 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.756 alo = 306, ahi = 1101
2025-07-01 17:49:07.756 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.756 blo = 306, bhi = 1101
2025-07-01 17:49:07.756
2025-07-01 17:49:07.756 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.756 g = []
2025-07-01 17:49:07.756 if alo < ahi:
2025-07-01 17:49:07.756 if blo < bhi:
2025-07-01 17:49:07.756 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.756 else:
2025-07-01 17:49:07.756 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.756 elif blo < bhi:
2025-07-01 17:49:07.756 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.756
2025-07-01 17:49:07.756 > yield from g
2025-07-01 17:49:07.756
2025-07-01 17:49:07.757 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.757 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.757
2025-07-01 17:49:07.757 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.757 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.757 alo = 306, ahi = 1101
2025-07-01 17:49:07.757 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.757 blo = 306, bhi = 1101
2025-07-01 17:49:07.757
2025-07-01 17:49:07.757 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.757 r"""
2025-07-01 17:49:07.757 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.757 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.757 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.757 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.757
2025-07-01 17:49:07.757 Example:
2025-07-01 17:49:07.757
2025-07-01 17:49:07.757 >>> d = Differ()
2025-07-01 17:49:07.757 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.758 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.758 >>> print(''.join(results), end="")
2025-07-01 17:49:07.758 - abcDefghiJkl
2025-07-01 17:49:07.758 + abcdefGhijkl
2025-07-01 17:49:07.758 """
2025-07-01 17:49:07.758
2025-07-01 17:49:07.758 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.758 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.758 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.758 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.758 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.758
2025-07-01 17:49:07.758 # search for the pair that matches best without being identical
2025-07-01 17:49:07.758 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.758 # on junk -- unless we have to)
2025-07-01 17:49:07.758 for j in range(blo, bhi):
2025-07-01 17:49:07.758 bj = b[j]
2025-07-01 17:49:07.758 cruncher.set_seq2(bj)
2025-07-01 17:49:07.758 for i in range(alo, ahi):
2025-07-01 17:49:07.758 ai = a[i]
2025-07-01 17:49:07.759 if ai == bj:
2025-07-01 17:49:07.759 if eqi is None:
2025-07-01 17:49:07.759 eqi, eqj = i, j
2025-07-01 17:49:07.759 continue
2025-07-01 17:49:07.759 cruncher.set_seq1(ai)
2025-07-01 17:49:07.759 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.759 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.759 # compares by a factor of 3.
2025-07-01 17:49:07.759 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.759 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.759 # of the computation is cached by cruncher
2025-07-01 17:49:07.759 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.759 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.759 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.759 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.759 if best_ratio < cutoff:
2025-07-01 17:49:07.759 # no non-identical "pretty close" pair
2025-07-01 17:49:07.759 if eqi is None:
2025-07-01 17:49:07.759 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.759 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.759 return
2025-07-01 17:49:07.760 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.760 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.760 else:
2025-07-01 17:49:07.760 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.760 eqi = None
2025-07-01 17:49:07.760
2025-07-01 17:49:07.760 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.760 # identical
2025-07-01 17:49:07.760
2025-07-01 17:49:07.760 # pump out diffs from before the synch point
2025-07-01 17:49:07.760 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.760
2025-07-01 17:49:07.760 # do intraline marking on the synch pair
2025-07-01 17:49:07.760 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.760 if eqi is None:
2025-07-01 17:49:07.760 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.760 atags = btags = ""
2025-07-01 17:49:07.760 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.760 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.760 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.760 if tag == 'replace':
2025-07-01 17:49:07.760 atags += '^' * la
2025-07-01 17:49:07.761 btags += '^' * lb
2025-07-01 17:49:07.761 elif tag == 'delete':
2025-07-01 17:49:07.761 atags += '-' * la
2025-07-01 17:49:07.761 elif tag == 'insert':
2025-07-01 17:49:07.761 btags += '+' * lb
2025-07-01 17:49:07.761 elif tag == 'equal':
2025-07-01 17:49:07.761 atags += ' ' * la
2025-07-01 17:49:07.761 btags += ' ' * lb
2025-07-01 17:49:07.761 else:
2025-07-01 17:49:07.761 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.761 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.761 else:
2025-07-01 17:49:07.761 # the synch pair is identical
2025-07-01 17:49:07.761 yield ' ' + aelt
2025-07-01 17:49:07.761
2025-07-01 17:49:07.761 # pump out diffs from after the synch point
2025-07-01 17:49:07.761 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.761
2025-07-01 17:49:07.761 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.761 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.762
2025-07-01 17:49:07.762 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.762 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.762 alo = 307, ahi = 1101
2025-07-01 17:49:07.762 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.762 blo = 307, bhi = 1101
2025-07-01 17:49:07.762
2025-07-01 17:49:07.762 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.762 g = []
2025-07-01 17:49:07.762 if alo < ahi:
2025-07-01 17:49:07.762 if blo < bhi:
2025-07-01 17:49:07.762 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.762 else:
2025-07-01 17:49:07.762 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.762 elif blo < bhi:
2025-07-01 17:49:07.762 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.762
2025-07-01 17:49:07.762 > yield from g
2025-07-01 17:49:07.762
2025-07-01 17:49:07.762 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.763 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.763
2025-07-01 17:49:07.763 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.763 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.763 alo = 307, ahi = 1101
2025-07-01 17:49:07.763 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.763 blo = 307, bhi = 1101
2025-07-01 17:49:07.763
2025-07-01 17:49:07.763 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.763 r"""
2025-07-01 17:49:07.763 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.763 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.763 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.763 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.763
2025-07-01 17:49:07.763 Example:
2025-07-01 17:49:07.763
2025-07-01 17:49:07.763 >>> d = Differ()
2025-07-01 17:49:07.763 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.763 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.764 >>> print(''.join(results), end="")
2025-07-01 17:49:07.764 - abcDefghiJkl
2025-07-01 17:49:07.764 + abcdefGhijkl
2025-07-01 17:49:07.764 """
2025-07-01 17:49:07.764
2025-07-01 17:49:07.764 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.764 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.764 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.764 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.764 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.764
2025-07-01 17:49:07.764 # search for the pair that matches best without being identical
2025-07-01 17:49:07.764 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.764 # on junk -- unless we have to)
2025-07-01 17:49:07.764 for j in range(blo, bhi):
2025-07-01 17:49:07.764 bj = b[j]
2025-07-01 17:49:07.764 cruncher.set_seq2(bj)
2025-07-01 17:49:07.764 for i in range(alo, ahi):
2025-07-01 17:49:07.764 ai = a[i]
2025-07-01 17:49:07.764 if ai == bj:
2025-07-01 17:49:07.765 if eqi is None:
2025-07-01 17:49:07.770 eqi, eqj = i, j
2025-07-01 17:49:07.770 continue
2025-07-01 17:49:07.770 cruncher.set_seq1(ai)
2025-07-01 17:49:07.770 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.770 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.770 # compares by a factor of 3.
2025-07-01 17:49:07.770 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.770 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.770 # of the computation is cached by cruncher
2025-07-01 17:49:07.770 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.770 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.770 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.770 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.770 if best_ratio < cutoff:
2025-07-01 17:49:07.770 # no non-identical "pretty close" pair
2025-07-01 17:49:07.770 if eqi is None:
2025-07-01 17:49:07.770 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.770 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.770 return
2025-07-01 17:49:07.771 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.771 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.771 else:
2025-07-01 17:49:07.771 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.771 eqi = None
2025-07-01 17:49:07.771
2025-07-01 17:49:07.771 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.771 # identical
2025-07-01 17:49:07.771
2025-07-01 17:49:07.771 # pump out diffs from before the synch point
2025-07-01 17:49:07.771 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.771
2025-07-01 17:49:07.771 # do intraline marking on the synch pair
2025-07-01 17:49:07.771 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.771 if eqi is None:
2025-07-01 17:49:07.771 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.771 atags = btags = ""
2025-07-01 17:49:07.771 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.771 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.771 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.771 if tag == 'replace':
2025-07-01 17:49:07.772 atags += '^' * la
2025-07-01 17:49:07.772 btags += '^' * lb
2025-07-01 17:49:07.772 elif tag == 'delete':
2025-07-01 17:49:07.772 atags += '-' * la
2025-07-01 17:49:07.772 elif tag == 'insert':
2025-07-01 17:49:07.772 btags += '+' * lb
2025-07-01 17:49:07.772 elif tag == 'equal':
2025-07-01 17:49:07.772 atags += ' ' * la
2025-07-01 17:49:07.772 btags += ' ' * lb
2025-07-01 17:49:07.772 else:
2025-07-01 17:49:07.772 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.772 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.772 else:
2025-07-01 17:49:07.772 # the synch pair is identical
2025-07-01 17:49:07.772 yield ' ' + aelt
2025-07-01 17:49:07.772
2025-07-01 17:49:07.772 # pump out diffs from after the synch point
2025-07-01 17:49:07.772 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.772
2025-07-01 17:49:07.772 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.772 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.772
2025-07-01 17:49:07.773 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.773 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.773 alo = 308, ahi = 1101
2025-07-01 17:49:07.773 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.773 blo = 308, bhi = 1101
2025-07-01 17:49:07.773
2025-07-01 17:49:07.773 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.773 g = []
2025-07-01 17:49:07.773 if alo < ahi:
2025-07-01 17:49:07.773 if blo < bhi:
2025-07-01 17:49:07.773 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.773 else:
2025-07-01 17:49:07.773 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.773 elif blo < bhi:
2025-07-01 17:49:07.773 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.773
2025-07-01 17:49:07.773 > yield from g
2025-07-01 17:49:07.773
2025-07-01 17:49:07.773 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.773 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.773
2025-07-01 17:49:07.774 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.774 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.774 alo = 308, ahi = 1101
2025-07-01 17:49:07.774 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.774 blo = 308, bhi = 1101
2025-07-01 17:49:07.774
2025-07-01 17:49:07.774 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.774 r"""
2025-07-01 17:49:07.774 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.774 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.774 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.774 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.774
2025-07-01 17:49:07.774 Example:
2025-07-01 17:49:07.774
2025-07-01 17:49:07.774 >>> d = Differ()
2025-07-01 17:49:07.774 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.774 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.774 >>> print(''.join(results), end="")
2025-07-01 17:49:07.775 - abcDefghiJkl
2025-07-01 17:49:07.775 + abcdefGhijkl
2025-07-01 17:49:07.775 """
2025-07-01 17:49:07.775
2025-07-01 17:49:07.775 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.775 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.775 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.775 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.775 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.775
2025-07-01 17:49:07.775 # search for the pair that matches best without being identical
2025-07-01 17:49:07.776 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.776 # on junk -- unless we have to)
2025-07-01 17:49:07.776 for j in range(blo, bhi):
2025-07-01 17:49:07.776 bj = b[j]
2025-07-01 17:49:07.776 cruncher.set_seq2(bj)
2025-07-01 17:49:07.776 for i in range(alo, ahi):
2025-07-01 17:49:07.776 ai = a[i]
2025-07-01 17:49:07.776 if ai == bj:
2025-07-01 17:49:07.776 if eqi is None:
2025-07-01 17:49:07.776 eqi, eqj = i, j
2025-07-01 17:49:07.776 continue
2025-07-01 17:49:07.776 cruncher.set_seq1(ai)
2025-07-01 17:49:07.776 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.776 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.776 # compares by a factor of 3.
2025-07-01 17:49:07.776 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.776 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.776 # of the computation is cached by cruncher
2025-07-01 17:49:07.776 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.776 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.776 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.777 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.777 if best_ratio < cutoff:
2025-07-01 17:49:07.777 # no non-identical "pretty close" pair
2025-07-01 17:49:07.777 if eqi is None:
2025-07-01 17:49:07.777 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.777 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.777 return
2025-07-01 17:49:07.777 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.777 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.777 else:
2025-07-01 17:49:07.777 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.777 eqi = None
2025-07-01 17:49:07.777
2025-07-01 17:49:07.777 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.777 # identical
2025-07-01 17:49:07.777
2025-07-01 17:49:07.777 # pump out diffs from before the synch point
2025-07-01 17:49:07.777 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.777
2025-07-01 17:49:07.777 # do intraline marking on the synch pair
2025-07-01 17:49:07.777 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.778 if eqi is None:
2025-07-01 17:49:07.778 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.778 atags = btags = ""
2025-07-01 17:49:07.778 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.778 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.778 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.778 if tag == 'replace':
2025-07-01 17:49:07.778 atags += '^' * la
2025-07-01 17:49:07.778 btags += '^' * lb
2025-07-01 17:49:07.778 elif tag == 'delete':
2025-07-01 17:49:07.778 atags += '-' * la
2025-07-01 17:49:07.778 elif tag == 'insert':
2025-07-01 17:49:07.778 btags += '+' * lb
2025-07-01 17:49:07.778 elif tag == 'equal':
2025-07-01 17:49:07.778 atags += ' ' * la
2025-07-01 17:49:07.778 btags += ' ' * lb
2025-07-01 17:49:07.778 else:
2025-07-01 17:49:07.778 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.778 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.778 else:
2025-07-01 17:49:07.778 # the synch pair is identical
2025-07-01 17:49:07.779 yield ' ' + aelt
2025-07-01 17:49:07.779
2025-07-01 17:49:07.779 # pump out diffs from after the synch point
2025-07-01 17:49:07.779 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.779
2025-07-01 17:49:07.779 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.779 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.779
2025-07-01 17:49:07.779 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.779 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.779 alo = 309, ahi = 1101
2025-07-01 17:49:07.779 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.779 blo = 309, bhi = 1101
2025-07-01 17:49:07.779
2025-07-01 17:49:07.779 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.779 g = []
2025-07-01 17:49:07.779 if alo < ahi:
2025-07-01 17:49:07.779 if blo < bhi:
2025-07-01 17:49:07.779 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.779 else:
2025-07-01 17:49:07.779 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.779 elif blo < bhi:
2025-07-01 17:49:07.780 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.780
2025-07-01 17:49:07.780 > yield from g
2025-07-01 17:49:07.780
2025-07-01 17:49:07.780 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.780 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.780
2025-07-01 17:49:07.780 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.780 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.780 alo = 309, ahi = 1101
2025-07-01 17:49:07.780 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.780 blo = 309, bhi = 1101
2025-07-01 17:49:07.780
2025-07-01 17:49:07.780 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.780 r"""
2025-07-01 17:49:07.780 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.780 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.780 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.780 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.780
2025-07-01 17:49:07.780 Example:
2025-07-01 17:49:07.781
2025-07-01 17:49:07.784 >>> d = Differ()
2025-07-01 17:49:07.784 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.784 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.784 >>> print(''.join(results), end="")
2025-07-01 17:49:07.784 - abcDefghiJkl
2025-07-01 17:49:07.784 + abcdefGhijkl
2025-07-01 17:49:07.784 """
2025-07-01 17:49:07.784
2025-07-01 17:49:07.784 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.784 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.784 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.784 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.784 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.784
2025-07-01 17:49:07.784 # search for the pair that matches best without being identical
2025-07-01 17:49:07.784 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.784 # on junk -- unless we have to)
2025-07-01 17:49:07.784 for j in range(blo, bhi):
2025-07-01 17:49:07.785 bj = b[j]
2025-07-01 17:49:07.785 cruncher.set_seq2(bj)
2025-07-01 17:49:07.785 for i in range(alo, ahi):
2025-07-01 17:49:07.785 ai = a[i]
2025-07-01 17:49:07.785 if ai == bj:
2025-07-01 17:49:07.785 if eqi is None:
2025-07-01 17:49:07.785 eqi, eqj = i, j
2025-07-01 17:49:07.785 continue
2025-07-01 17:49:07.785 cruncher.set_seq1(ai)
2025-07-01 17:49:07.785 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.785 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.785 # compares by a factor of 3.
2025-07-01 17:49:07.785 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.785 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.785 # of the computation is cached by cruncher
2025-07-01 17:49:07.785 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.785 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.785 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.785 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.785 if best_ratio < cutoff:
2025-07-01 17:49:07.785 # no non-identical "pretty close" pair
2025-07-01 17:49:07.786 if eqi is None:
2025-07-01 17:49:07.786 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.786 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.786 return
2025-07-01 17:49:07.786 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.786 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.786 else:
2025-07-01 17:49:07.786 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.786 eqi = None
2025-07-01 17:49:07.786
2025-07-01 17:49:07.786 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.786 # identical
2025-07-01 17:49:07.786
2025-07-01 17:49:07.786 # pump out diffs from before the synch point
2025-07-01 17:49:07.786 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.786
2025-07-01 17:49:07.786 # do intraline marking on the synch pair
2025-07-01 17:49:07.787 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.787 if eqi is None:
2025-07-01 17:49:07.787 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.787 atags = btags = ""
2025-07-01 17:49:07.787 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.787 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.787 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.787 if tag == 'replace':
2025-07-01 17:49:07.787 atags += '^' * la
2025-07-01 17:49:07.787 btags += '^' * lb
2025-07-01 17:49:07.787 elif tag == 'delete':
2025-07-01 17:49:07.787 atags += '-' * la
2025-07-01 17:49:07.787 elif tag == 'insert':
2025-07-01 17:49:07.787 btags += '+' * lb
2025-07-01 17:49:07.787 elif tag == 'equal':
2025-07-01 17:49:07.787 atags += ' ' * la
2025-07-01 17:49:07.787 btags += ' ' * lb
2025-07-01 17:49:07.787 else:
2025-07-01 17:49:07.787 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.787 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.788 else:
2025-07-01 17:49:07.788 # the synch pair is identical
2025-07-01 17:49:07.788 yield ' ' + aelt
2025-07-01 17:49:07.788
2025-07-01 17:49:07.788 # pump out diffs from after the synch point
2025-07-01 17:49:07.788 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.788
2025-07-01 17:49:07.788 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.788 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.788
2025-07-01 17:49:07.788 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.788 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.788 alo = 312, ahi = 1101
2025-07-01 17:49:07.788 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.788 blo = 312, bhi = 1101
2025-07-01 17:49:07.788
2025-07-01 17:49:07.788 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.788 g = []
2025-07-01 17:49:07.788 if alo < ahi:
2025-07-01 17:49:07.788 if blo < bhi:
2025-07-01 17:49:07.789 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.789 else:
2025-07-01 17:49:07.789 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.789 elif blo < bhi:
2025-07-01 17:49:07.789 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.789
2025-07-01 17:49:07.789 > yield from g
2025-07-01 17:49:07.789
2025-07-01 17:49:07.789 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.789 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.789
2025-07-01 17:49:07.789 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.789 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.789 alo = 312, ahi = 1101
2025-07-01 17:49:07.789 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.789 blo = 312, bhi = 1101
2025-07-01 17:49:07.789
2025-07-01 17:49:07.789 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.789 r"""
2025-07-01 17:49:07.789 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.790 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.790 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.790 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.790
2025-07-01 17:49:07.790 Example:
2025-07-01 17:49:07.790
2025-07-01 17:49:07.790 >>> d = Differ()
2025-07-01 17:49:07.790 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.790 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.790 >>> print(''.join(results), end="")
2025-07-01 17:49:07.790 - abcDefghiJkl
2025-07-01 17:49:07.790 + abcdefGhijkl
2025-07-01 17:49:07.790 """
2025-07-01 17:49:07.790
2025-07-01 17:49:07.790 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.790 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.791 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.791 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.791 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.791
2025-07-01 17:49:07.791 # search for the pair that matches best without being identical
2025-07-01 17:49:07.791 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.791 # on junk -- unless we have to)
2025-07-01 17:49:07.791 for j in range(blo, bhi):
2025-07-01 17:49:07.791 bj = b[j]
2025-07-01 17:49:07.791 cruncher.set_seq2(bj)
2025-07-01 17:49:07.791 for i in range(alo, ahi):
2025-07-01 17:49:07.791 ai = a[i]
2025-07-01 17:49:07.791 if ai == bj:
2025-07-01 17:49:07.791 if eqi is None:
2025-07-01 17:49:07.791 eqi, eqj = i, j
2025-07-01 17:49:07.791 continue
2025-07-01 17:49:07.791 cruncher.set_seq1(ai)
2025-07-01 17:49:07.791 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.791 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.791 # compares by a factor of 3.
2025-07-01 17:49:07.792 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.792 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.792 # of the computation is cached by cruncher
2025-07-01 17:49:07.792 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.792 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.792 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.792 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.792 if best_ratio < cutoff:
2025-07-01 17:49:07.792 # no non-identical "pretty close" pair
2025-07-01 17:49:07.792 if eqi is None:
2025-07-01 17:49:07.792 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.792 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.792 return
2025-07-01 17:49:07.792 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.792 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.792 else:
2025-07-01 17:49:07.792 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.792 eqi = None
2025-07-01 17:49:07.792
2025-07-01 17:49:07.793 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.793 # identical
2025-07-01 17:49:07.793
2025-07-01 17:49:07.793 # pump out diffs from before the synch point
2025-07-01 17:49:07.793 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.793
2025-07-01 17:49:07.793 # do intraline marking on the synch pair
2025-07-01 17:49:07.793 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.793 if eqi is None:
2025-07-01 17:49:07.793 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.793 atags = btags = ""
2025-07-01 17:49:07.793 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.793 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.793 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.793 if tag == 'replace':
2025-07-01 17:49:07.793 atags += '^' * la
2025-07-01 17:49:07.793 btags += '^' * lb
2025-07-01 17:49:07.793 elif tag == 'delete':
2025-07-01 17:49:07.793 atags += '-' * la
2025-07-01 17:49:07.793 elif tag == 'insert':
2025-07-01 17:49:07.793 btags += '+' * lb
2025-07-01 17:49:07.794 elif tag == 'equal':
2025-07-01 17:49:07.794 atags += ' ' * la
2025-07-01 17:49:07.794 btags += ' ' * lb
2025-07-01 17:49:07.794 else:
2025-07-01 17:49:07.794 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.794 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.794 else:
2025-07-01 17:49:07.794 # the synch pair is identical
2025-07-01 17:49:07.794 yield ' ' + aelt
2025-07-01 17:49:07.794
2025-07-01 17:49:07.794 # pump out diffs from after the synch point
2025-07-01 17:49:07.794 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.794
2025-07-01 17:49:07.794 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.794 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.794
2025-07-01 17:49:07.794 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.794 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.794 alo = 313, ahi = 1101
2025-07-01 17:49:07.794 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.795 blo = 313, bhi = 1101
2025-07-01 17:49:07.795
2025-07-01 17:49:07.795 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.795 g = []
2025-07-01 17:49:07.795 if alo < ahi:
2025-07-01 17:49:07.795 if blo < bhi:
2025-07-01 17:49:07.795 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.795 else:
2025-07-01 17:49:07.795 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.795 elif blo < bhi:
2025-07-01 17:49:07.795 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.795
2025-07-01 17:49:07.795 > yield from g
2025-07-01 17:49:07.795
2025-07-01 17:49:07.795 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.795 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.795
2025-07-01 17:49:07.795 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.795 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.795 alo = 313, ahi = 1101
2025-07-01 17:49:07.796 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.796 blo = 313, bhi = 1101
2025-07-01 17:49:07.796
2025-07-01 17:49:07.796 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.796 r"""
2025-07-01 17:49:07.796 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.796 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.796 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.796 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.796
2025-07-01 17:49:07.796 Example:
2025-07-01 17:49:07.796
2025-07-01 17:49:07.796 >>> d = Differ()
2025-07-01 17:49:07.796 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.796 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.796 >>> print(''.join(results), end="")
2025-07-01 17:49:07.796 - abcDefghiJkl
2025-07-01 17:49:07.796 + abcdefGhijkl
2025-07-01 17:49:07.797 """
2025-07-01 17:49:07.802
2025-07-01 17:49:07.802 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.802 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.802 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.802 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.802 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.802
2025-07-01 17:49:07.802 # search for the pair that matches best without being identical
2025-07-01 17:49:07.802 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.802 # on junk -- unless we have to)
2025-07-01 17:49:07.802 for j in range(blo, bhi):
2025-07-01 17:49:07.802 bj = b[j]
2025-07-01 17:49:07.802 cruncher.set_seq2(bj)
2025-07-01 17:49:07.802 for i in range(alo, ahi):
2025-07-01 17:49:07.802 ai = a[i]
2025-07-01 17:49:07.802 if ai == bj:
2025-07-01 17:49:07.802 if eqi is None:
2025-07-01 17:49:07.802 eqi, eqj = i, j
2025-07-01 17:49:07.803 continue
2025-07-01 17:49:07.803 cruncher.set_seq1(ai)
2025-07-01 17:49:07.803 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.803 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.803 # compares by a factor of 3.
2025-07-01 17:49:07.803 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.803 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.803 # of the computation is cached by cruncher
2025-07-01 17:49:07.803 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.803 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.803 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.803 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.803 if best_ratio < cutoff:
2025-07-01 17:49:07.803 # no non-identical "pretty close" pair
2025-07-01 17:49:07.803 if eqi is None:
2025-07-01 17:49:07.803 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.803 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.803 return
2025-07-01 17:49:07.803 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.803 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.804 else:
2025-07-01 17:49:07.804 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.804 eqi = None
2025-07-01 17:49:07.804
2025-07-01 17:49:07.804 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.804 # identical
2025-07-01 17:49:07.804
2025-07-01 17:49:07.804 # pump out diffs from before the synch point
2025-07-01 17:49:07.804 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.804
2025-07-01 17:49:07.804 # do intraline marking on the synch pair
2025-07-01 17:49:07.804 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.804 if eqi is None:
2025-07-01 17:49:07.804 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.804 atags = btags = ""
2025-07-01 17:49:07.804 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.804 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.804 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.804 if tag == 'replace':
2025-07-01 17:49:07.804 atags += '^' * la
2025-07-01 17:49:07.804 btags += '^' * lb
2025-07-01 17:49:07.805 elif tag == 'delete':
2025-07-01 17:49:07.805 atags += '-' * la
2025-07-01 17:49:07.805 elif tag == 'insert':
2025-07-01 17:49:07.805 btags += '+' * lb
2025-07-01 17:49:07.805 elif tag == 'equal':
2025-07-01 17:49:07.805 atags += ' ' * la
2025-07-01 17:49:07.805 btags += ' ' * lb
2025-07-01 17:49:07.805 else:
2025-07-01 17:49:07.805 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.805 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.805 else:
2025-07-01 17:49:07.805 # the synch pair is identical
2025-07-01 17:49:07.805 yield ' ' + aelt
2025-07-01 17:49:07.805
2025-07-01 17:49:07.805 # pump out diffs from after the synch point
2025-07-01 17:49:07.805 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.805
2025-07-01 17:49:07.805 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.805 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.805
2025-07-01 17:49:07.806 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.806 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.806 alo = 314, ahi = 1101
2025-07-01 17:49:07.806 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.806 blo = 314, bhi = 1101
2025-07-01 17:49:07.806
2025-07-01 17:49:07.806 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.806 g = []
2025-07-01 17:49:07.806 if alo < ahi:
2025-07-01 17:49:07.806 if blo < bhi:
2025-07-01 17:49:07.806 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.806 else:
2025-07-01 17:49:07.806 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.806 elif blo < bhi:
2025-07-01 17:49:07.806 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.806
2025-07-01 17:49:07.806 > yield from g
2025-07-01 17:49:07.806
2025-07-01 17:49:07.806 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.806 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.807
2025-07-01 17:49:07.807 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.807 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.807 alo = 314, ahi = 1101
2025-07-01 17:49:07.807 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.807 blo = 314, bhi = 1101
2025-07-01 17:49:07.807
2025-07-01 17:49:07.807 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.807 r"""
2025-07-01 17:49:07.807 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.807 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.807 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.807 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.807
2025-07-01 17:49:07.807 Example:
2025-07-01 17:49:07.807
2025-07-01 17:49:07.807 >>> d = Differ()
2025-07-01 17:49:07.807 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.807 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.807 >>> print(''.join(results), end="")
2025-07-01 17:49:07.807 - abcDefghiJkl
2025-07-01 17:49:07.808 + abcdefGhijkl
2025-07-01 17:49:07.808 """
2025-07-01 17:49:07.808
2025-07-01 17:49:07.808 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.808 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.808 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.808 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.808 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.808
2025-07-01 17:49:07.808 # search for the pair that matches best without being identical
2025-07-01 17:49:07.808 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.808 # on junk -- unless we have to)
2025-07-01 17:49:07.808 for j in range(blo, bhi):
2025-07-01 17:49:07.808 bj = b[j]
2025-07-01 17:49:07.808 cruncher.set_seq2(bj)
2025-07-01 17:49:07.808 for i in range(alo, ahi):
2025-07-01 17:49:07.808 ai = a[i]
2025-07-01 17:49:07.809 if ai == bj:
2025-07-01 17:49:07.809 if eqi is None:
2025-07-01 17:49:07.809 eqi, eqj = i, j
2025-07-01 17:49:07.809 continue
2025-07-01 17:49:07.809 cruncher.set_seq1(ai)
2025-07-01 17:49:07.809 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.809 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.809 # compares by a factor of 3.
2025-07-01 17:49:07.809 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.809 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.809 # of the computation is cached by cruncher
2025-07-01 17:49:07.809 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.809 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.809 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.809 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.809 if best_ratio < cutoff:
2025-07-01 17:49:07.809 # no non-identical "pretty close" pair
2025-07-01 17:49:07.809 if eqi is None:
2025-07-01 17:49:07.809 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.809 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.810 return
2025-07-01 17:49:07.810 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.810 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.810 else:
2025-07-01 17:49:07.810 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.810 eqi = None
2025-07-01 17:49:07.810
2025-07-01 17:49:07.810 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.810 # identical
2025-07-01 17:49:07.810
2025-07-01 17:49:07.810 # pump out diffs from before the synch point
2025-07-01 17:49:07.810 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.810
2025-07-01 17:49:07.810 # do intraline marking on the synch pair
2025-07-01 17:49:07.810 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.810 if eqi is None:
2025-07-01 17:49:07.810 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.810 atags = btags = ""
2025-07-01 17:49:07.810 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.810 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.810 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.811 if tag == 'replace':
2025-07-01 17:49:07.811 atags += '^' * la
2025-07-01 17:49:07.811 btags += '^' * lb
2025-07-01 17:49:07.811 elif tag == 'delete':
2025-07-01 17:49:07.811 atags += '-' * la
2025-07-01 17:49:07.811 elif tag == 'insert':
2025-07-01 17:49:07.811 btags += '+' * lb
2025-07-01 17:49:07.811 elif tag == 'equal':
2025-07-01 17:49:07.811 atags += ' ' * la
2025-07-01 17:49:07.811 btags += ' ' * lb
2025-07-01 17:49:07.811 else:
2025-07-01 17:49:07.811 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.811 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.811 else:
2025-07-01 17:49:07.811 # the synch pair is identical
2025-07-01 17:49:07.811 yield ' ' + aelt
2025-07-01 17:49:07.811
2025-07-01 17:49:07.811 # pump out diffs from after the synch point
2025-07-01 17:49:07.811 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.811
2025-07-01 17:49:07.811 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.812 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.812
2025-07-01 17:49:07.812 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.812 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.812 alo = 315, ahi = 1101
2025-07-01 17:49:07.812 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.812 blo = 315, bhi = 1101
2025-07-01 17:49:07.812
2025-07-01 17:49:07.812 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.812 g = []
2025-07-01 17:49:07.812 if alo < ahi:
2025-07-01 17:49:07.812 if blo < bhi:
2025-07-01 17:49:07.812 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.812 else:
2025-07-01 17:49:07.812 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.812 elif blo < bhi:
2025-07-01 17:49:07.812 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.812
2025-07-01 17:49:07.812 > yield from g
2025-07-01 17:49:07.812
2025-07-01 17:49:07.813 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.816 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.816
2025-07-01 17:49:07.816 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.816 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.816 alo = 315, ahi = 1101
2025-07-01 17:49:07.816 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.816 blo = 315, bhi = 1101
2025-07-01 17:49:07.816
2025-07-01 17:49:07.816 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.816 r"""
2025-07-01 17:49:07.816 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.816 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.816 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.816 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.816
2025-07-01 17:49:07.816 Example:
2025-07-01 17:49:07.816
2025-07-01 17:49:07.816 >>> d = Differ()
2025-07-01 17:49:07.816 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.817 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.817 >>> print(''.join(results), end="")
2025-07-01 17:49:07.817 - abcDefghiJkl
2025-07-01 17:49:07.817 + abcdefGhijkl
2025-07-01 17:49:07.817 """
2025-07-01 17:49:07.817
2025-07-01 17:49:07.817 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.817 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.817 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.817 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.817 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.817
2025-07-01 17:49:07.817 # search for the pair that matches best without being identical
2025-07-01 17:49:07.817 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.817 # on junk -- unless we have to)
2025-07-01 17:49:07.817 for j in range(blo, bhi):
2025-07-01 17:49:07.817 bj = b[j]
2025-07-01 17:49:07.817 cruncher.set_seq2(bj)
2025-07-01 17:49:07.817 for i in range(alo, ahi):
2025-07-01 17:49:07.818 ai = a[i]
2025-07-01 17:49:07.818 if ai == bj:
2025-07-01 17:49:07.818 if eqi is None:
2025-07-01 17:49:07.818 eqi, eqj = i, j
2025-07-01 17:49:07.818 continue
2025-07-01 17:49:07.818 cruncher.set_seq1(ai)
2025-07-01 17:49:07.818 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.818 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.818 # compares by a factor of 3.
2025-07-01 17:49:07.818 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.818 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.818 # of the computation is cached by cruncher
2025-07-01 17:49:07.818 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.818 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.818 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.818 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.818 if best_ratio < cutoff:
2025-07-01 17:49:07.818 # no non-identical "pretty close" pair
2025-07-01 17:49:07.818 if eqi is None:
2025-07-01 17:49:07.818 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.818 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.819 return
2025-07-01 17:49:07.819 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.819 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.819 else:
2025-07-01 17:49:07.819 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.819 eqi = None
2025-07-01 17:49:07.819
2025-07-01 17:49:07.819 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.819 # identical
2025-07-01 17:49:07.819
2025-07-01 17:49:07.819 # pump out diffs from before the synch point
2025-07-01 17:49:07.819 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.819
2025-07-01 17:49:07.819 # do intraline marking on the synch pair
2025-07-01 17:49:07.819 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.819 if eqi is None:
2025-07-01 17:49:07.819 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.819 atags = btags = ""
2025-07-01 17:49:07.819 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.819 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.819 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.819 if tag == 'replace':
2025-07-01 17:49:07.819 atags += '^' * la
2025-07-01 17:49:07.819 btags += '^' * lb
2025-07-01 17:49:07.819 elif tag == 'delete':
2025-07-01 17:49:07.819 atags += '-' * la
2025-07-01 17:49:07.819 elif tag == 'insert':
2025-07-01 17:49:07.820 btags += '+' * lb
2025-07-01 17:49:07.820 elif tag == 'equal':
2025-07-01 17:49:07.820 atags += ' ' * la
2025-07-01 17:49:07.820 btags += ' ' * lb
2025-07-01 17:49:07.820 else:
2025-07-01 17:49:07.820 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.820 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.820 else:
2025-07-01 17:49:07.820 # the synch pair is identical
2025-07-01 17:49:07.820 yield ' ' + aelt
2025-07-01 17:49:07.820
2025-07-01 17:49:07.820 # pump out diffs from after the synch point
2025-07-01 17:49:07.820 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.820
2025-07-01 17:49:07.820 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.820 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.820
2025-07-01 17:49:07.820 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.820 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.821 alo = 316, ahi = 1101
2025-07-01 17:49:07.821 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.821 blo = 316, bhi = 1101
2025-07-01 17:49:07.821
2025-07-01 17:49:07.821 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.821 g = []
2025-07-01 17:49:07.821 if alo < ahi:
2025-07-01 17:49:07.821 if blo < bhi:
2025-07-01 17:49:07.821 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.821 else:
2025-07-01 17:49:07.821 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.821 elif blo < bhi:
2025-07-01 17:49:07.821 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.821
2025-07-01 17:49:07.821 > yield from g
2025-07-01 17:49:07.821
2025-07-01 17:49:07.821 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.821 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.821
2025-07-01 17:49:07.821 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.821 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.822 alo = 316, ahi = 1101
2025-07-01 17:49:07.822 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.822 blo = 316, bhi = 1101
2025-07-01 17:49:07.822
2025-07-01 17:49:07.822 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.822 r"""
2025-07-01 17:49:07.822 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.822 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.822 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.822 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.822
2025-07-01 17:49:07.822 Example:
2025-07-01 17:49:07.822
2025-07-01 17:49:07.822 >>> d = Differ()
2025-07-01 17:49:07.822 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.822 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.822 >>> print(''.join(results), end="")
2025-07-01 17:49:07.822 - abcDefghiJkl
2025-07-01 17:49:07.822 + abcdefGhijkl
2025-07-01 17:49:07.823 """
2025-07-01 17:49:07.823
2025-07-01 17:49:07.823 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.823 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.823 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.823 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.823 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.823
2025-07-01 17:49:07.823 # search for the pair that matches best without being identical
2025-07-01 17:49:07.823 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.823 # on junk -- unless we have to)
2025-07-01 17:49:07.823 for j in range(blo, bhi):
2025-07-01 17:49:07.823 bj = b[j]
2025-07-01 17:49:07.823 cruncher.set_seq2(bj)
2025-07-01 17:49:07.823 for i in range(alo, ahi):
2025-07-01 17:49:07.823 ai = a[i]
2025-07-01 17:49:07.823 if ai == bj:
2025-07-01 17:49:07.823 if eqi is None:
2025-07-01 17:49:07.823 eqi, eqj = i, j
2025-07-01 17:49:07.823 continue
2025-07-01 17:49:07.823 cruncher.set_seq1(ai)
2025-07-01 17:49:07.824 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.824 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.824 # compares by a factor of 3.
2025-07-01 17:49:07.824 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.824 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.824 # of the computation is cached by cruncher
2025-07-01 17:49:07.824 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.824 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.824 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.824 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.824 if best_ratio < cutoff:
2025-07-01 17:49:07.824 # no non-identical "pretty close" pair
2025-07-01 17:49:07.824 if eqi is None:
2025-07-01 17:49:07.824 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.824 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.824 return
2025-07-01 17:49:07.824 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.824 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.824 else:
2025-07-01 17:49:07.824 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.824 eqi = None
2025-07-01 17:49:07.825
2025-07-01 17:49:07.825 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.825 # identical
2025-07-01 17:49:07.825
2025-07-01 17:49:07.825 # pump out diffs from before the synch point
2025-07-01 17:49:07.825 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.825
2025-07-01 17:49:07.825 # do intraline marking on the synch pair
2025-07-01 17:49:07.825 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.825 if eqi is None:
2025-07-01 17:49:07.825 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.825 atags = btags = ""
2025-07-01 17:49:07.825 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.825 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.825 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.825 if tag == 'replace':
2025-07-01 17:49:07.825 atags += '^' * la
2025-07-01 17:49:07.825 btags += '^' * lb
2025-07-01 17:49:07.825 elif tag == 'delete':
2025-07-01 17:49:07.825 atags += '-' * la
2025-07-01 17:49:07.825 elif tag == 'insert':
2025-07-01 17:49:07.825 btags += '+' * lb
2025-07-01 17:49:07.826 elif tag == 'equal':
2025-07-01 17:49:07.826 atags += ' ' * la
2025-07-01 17:49:07.826 btags += ' ' * lb
2025-07-01 17:49:07.826 else:
2025-07-01 17:49:07.826 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.826 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.826 else:
2025-07-01 17:49:07.826 # the synch pair is identical
2025-07-01 17:49:07.826 yield ' ' + aelt
2025-07-01 17:49:07.826
2025-07-01 17:49:07.826 # pump out diffs from after the synch point
2025-07-01 17:49:07.826 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.826
2025-07-01 17:49:07.826 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.826 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.826
2025-07-01 17:49:07.826 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.826 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.826 alo = 317, ahi = 1101
2025-07-01 17:49:07.826 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.826 blo = 317, bhi = 1101
2025-07-01 17:49:07.827
2025-07-01 17:49:07.827 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.827 g = []
2025-07-01 17:49:07.827 if alo < ahi:
2025-07-01 17:49:07.827 if blo < bhi:
2025-07-01 17:49:07.827 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.827 else:
2025-07-01 17:49:07.827 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.827 elif blo < bhi:
2025-07-01 17:49:07.827 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.827
2025-07-01 17:49:07.827 > yield from g
2025-07-01 17:49:07.827
2025-07-01 17:49:07.827 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.827 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.827
2025-07-01 17:49:07.827 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.827 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.827 alo = 317, ahi = 1101
2025-07-01 17:49:07.827 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.828 blo = 317, bhi = 1101
2025-07-01 17:49:07.833
2025-07-01 17:49:07.834 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.834 r"""
2025-07-01 17:49:07.834 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.834 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.834 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.834 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.834
2025-07-01 17:49:07.834 Example:
2025-07-01 17:49:07.834
2025-07-01 17:49:07.834 >>> d = Differ()
2025-07-01 17:49:07.834 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.834 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.834 >>> print(''.join(results), end="")
2025-07-01 17:49:07.834 - abcDefghiJkl
2025-07-01 17:49:07.834 + abcdefGhijkl
2025-07-01 17:49:07.834 """
2025-07-01 17:49:07.834
2025-07-01 17:49:07.835 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.835 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.835 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.835 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.835 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.835
2025-07-01 17:49:07.835 # search for the pair that matches best without being identical
2025-07-01 17:49:07.835 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.835 # on junk -- unless we have to)
2025-07-01 17:49:07.835 for j in range(blo, bhi):
2025-07-01 17:49:07.835 bj = b[j]
2025-07-01 17:49:07.835 cruncher.set_seq2(bj)
2025-07-01 17:49:07.835 for i in range(alo, ahi):
2025-07-01 17:49:07.835 ai = a[i]
2025-07-01 17:49:07.835 if ai == bj:
2025-07-01 17:49:07.835 if eqi is None:
2025-07-01 17:49:07.835 eqi, eqj = i, j
2025-07-01 17:49:07.835 continue
2025-07-01 17:49:07.835 cruncher.set_seq1(ai)
2025-07-01 17:49:07.835 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.835 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.836 # compares by a factor of 3.
2025-07-01 17:49:07.836 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.836 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.836 # of the computation is cached by cruncher
2025-07-01 17:49:07.836 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.836 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.836 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.836 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.836 if best_ratio < cutoff:
2025-07-01 17:49:07.836 # no non-identical "pretty close" pair
2025-07-01 17:49:07.836 if eqi is None:
2025-07-01 17:49:07.836 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.836 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.836 return
2025-07-01 17:49:07.836 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.836 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.836 else:
2025-07-01 17:49:07.836 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.836 eqi = None
2025-07-01 17:49:07.836
2025-07-01 17:49:07.837 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.837 # identical
2025-07-01 17:49:07.837
2025-07-01 17:49:07.837 # pump out diffs from before the synch point
2025-07-01 17:49:07.837 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.837
2025-07-01 17:49:07.837 # do intraline marking on the synch pair
2025-07-01 17:49:07.837 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.837 if eqi is None:
2025-07-01 17:49:07.837 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.837 atags = btags = ""
2025-07-01 17:49:07.837 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.837 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.837 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.837 if tag == 'replace':
2025-07-01 17:49:07.837 atags += '^' * la
2025-07-01 17:49:07.837 btags += '^' * lb
2025-07-01 17:49:07.837 elif tag == 'delete':
2025-07-01 17:49:07.837 atags += '-' * la
2025-07-01 17:49:07.837 elif tag == 'insert':
2025-07-01 17:49:07.837 btags += '+' * lb
2025-07-01 17:49:07.838 elif tag == 'equal':
2025-07-01 17:49:07.838 atags += ' ' * la
2025-07-01 17:49:07.838 btags += ' ' * lb
2025-07-01 17:49:07.838 else:
2025-07-01 17:49:07.838 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.838 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.838 else:
2025-07-01 17:49:07.838 # the synch pair is identical
2025-07-01 17:49:07.838 yield ' ' + aelt
2025-07-01 17:49:07.838
2025-07-01 17:49:07.838 # pump out diffs from after the synch point
2025-07-01 17:49:07.838 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.838
2025-07-01 17:49:07.838 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.838 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.838
2025-07-01 17:49:07.838 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.838 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.838 alo = 318, ahi = 1101
2025-07-01 17:49:07.838 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.838 blo = 318, bhi = 1101
2025-07-01 17:49:07.839
2025-07-01 17:49:07.839 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.839 g = []
2025-07-01 17:49:07.839 if alo < ahi:
2025-07-01 17:49:07.839 if blo < bhi:
2025-07-01 17:49:07.839 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.839 else:
2025-07-01 17:49:07.839 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.839 elif blo < bhi:
2025-07-01 17:49:07.839 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.839
2025-07-01 17:49:07.839 > yield from g
2025-07-01 17:49:07.839
2025-07-01 17:49:07.839 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.839 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.839
2025-07-01 17:49:07.839 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.839 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.839 alo = 318, ahi = 1101
2025-07-01 17:49:07.839 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.839 blo = 318, bhi = 1101
2025-07-01 17:49:07.840
2025-07-01 17:49:07.840 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.840 r"""
2025-07-01 17:49:07.840 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.840 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.840 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.840 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.840
2025-07-01 17:49:07.840 Example:
2025-07-01 17:49:07.840
2025-07-01 17:49:07.840 >>> d = Differ()
2025-07-01 17:49:07.840 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.840 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.840 >>> print(''.join(results), end="")
2025-07-01 17:49:07.840 - abcDefghiJkl
2025-07-01 17:49:07.840 + abcdefGhijkl
2025-07-01 17:49:07.840 """
2025-07-01 17:49:07.840
2025-07-01 17:49:07.840 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.840 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.841 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.841 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.841 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.841
2025-07-01 17:49:07.841 # search for the pair that matches best without being identical
2025-07-01 17:49:07.841 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.841 # on junk -- unless we have to)
2025-07-01 17:49:07.841 for j in range(blo, bhi):
2025-07-01 17:49:07.841 bj = b[j]
2025-07-01 17:49:07.841 cruncher.set_seq2(bj)
2025-07-01 17:49:07.841 for i in range(alo, ahi):
2025-07-01 17:49:07.841 ai = a[i]
2025-07-01 17:49:07.841 if ai == bj:
2025-07-01 17:49:07.841 if eqi is None:
2025-07-01 17:49:07.841 eqi, eqj = i, j
2025-07-01 17:49:07.841 continue
2025-07-01 17:49:07.841 cruncher.set_seq1(ai)
2025-07-01 17:49:07.841 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.841 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.841 # compares by a factor of 3.
2025-07-01 17:49:07.841 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.842 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.842 # of the computation is cached by cruncher
2025-07-01 17:49:07.842 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.842 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.842 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.842 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.842 if best_ratio < cutoff:
2025-07-01 17:49:07.842 # no non-identical "pretty close" pair
2025-07-01 17:49:07.842 if eqi is None:
2025-07-01 17:49:07.842 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.842 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.842 return
2025-07-01 17:49:07.842 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.842 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.842 else:
2025-07-01 17:49:07.842 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.842 eqi = None
2025-07-01 17:49:07.842
2025-07-01 17:49:07.842 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.843 # identical
2025-07-01 17:49:07.843
2025-07-01 17:49:07.843 # pump out diffs from before the synch point
2025-07-01 17:49:07.843 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.843
2025-07-01 17:49:07.843 # do intraline marking on the synch pair
2025-07-01 17:49:07.843 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.843 if eqi is None:
2025-07-01 17:49:07.843 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.843 atags = btags = ""
2025-07-01 17:49:07.843 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.843 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.843 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.843 if tag == 'replace':
2025-07-01 17:49:07.843 atags += '^' * la
2025-07-01 17:49:07.843 btags += '^' * lb
2025-07-01 17:49:07.843 elif tag == 'delete':
2025-07-01 17:49:07.843 atags += '-' * la
2025-07-01 17:49:07.843 elif tag == 'insert':
2025-07-01 17:49:07.843 btags += '+' * lb
2025-07-01 17:49:07.844 elif tag == 'equal':
2025-07-01 17:49:07.846 atags += ' ' * la
2025-07-01 17:49:07.846 btags += ' ' * lb
2025-07-01 17:49:07.847 else:
2025-07-01 17:49:07.847 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.847 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.847 else:
2025-07-01 17:49:07.847 # the synch pair is identical
2025-07-01 17:49:07.847 yield ' ' + aelt
2025-07-01 17:49:07.847
2025-07-01 17:49:07.847 # pump out diffs from after the synch point
2025-07-01 17:49:07.847 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.847
2025-07-01 17:49:07.847 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.847 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.847
2025-07-01 17:49:07.847 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.847 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.847 alo = 319, ahi = 1101
2025-07-01 17:49:07.847 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.847 blo = 319, bhi = 1101
2025-07-01 17:49:07.847
2025-07-01 17:49:07.848 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.848 g = []
2025-07-01 17:49:07.848 if alo < ahi:
2025-07-01 17:49:07.848 if blo < bhi:
2025-07-01 17:49:07.848 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.848 else:
2025-07-01 17:49:07.848 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.848 elif blo < bhi:
2025-07-01 17:49:07.848 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.848
2025-07-01 17:49:07.848 > yield from g
2025-07-01 17:49:07.848
2025-07-01 17:49:07.848 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.848 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.848
2025-07-01 17:49:07.848 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.848 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.848 alo = 319, ahi = 1101
2025-07-01 17:49:07.848 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.848 blo = 319, bhi = 1101
2025-07-01 17:49:07.848
2025-07-01 17:49:07.849 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.849 r"""
2025-07-01 17:49:07.849 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.849 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.849 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.849 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.849
2025-07-01 17:49:07.849 Example:
2025-07-01 17:49:07.849
2025-07-01 17:49:07.849 >>> d = Differ()
2025-07-01 17:49:07.849 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.849 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.849 >>> print(''.join(results), end="")
2025-07-01 17:49:07.849 - abcDefghiJkl
2025-07-01 17:49:07.849 + abcdefGhijkl
2025-07-01 17:49:07.849 """
2025-07-01 17:49:07.849
2025-07-01 17:49:07.849 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.849 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.849 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.850 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.850 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.850
2025-07-01 17:49:07.850 # search for the pair that matches best without being identical
2025-07-01 17:49:07.850 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.850 # on junk -- unless we have to)
2025-07-01 17:49:07.850 for j in range(blo, bhi):
2025-07-01 17:49:07.850 bj = b[j]
2025-07-01 17:49:07.850 cruncher.set_seq2(bj)
2025-07-01 17:49:07.850 for i in range(alo, ahi):
2025-07-01 17:49:07.850 ai = a[i]
2025-07-01 17:49:07.850 if ai == bj:
2025-07-01 17:49:07.850 if eqi is None:
2025-07-01 17:49:07.850 eqi, eqj = i, j
2025-07-01 17:49:07.850 continue
2025-07-01 17:49:07.850 cruncher.set_seq1(ai)
2025-07-01 17:49:07.851 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.851 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.851 # compares by a factor of 3.
2025-07-01 17:49:07.851 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.851 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.851 # of the computation is cached by cruncher
2025-07-01 17:49:07.851 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.851 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.851 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.851 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.851 if best_ratio < cutoff:
2025-07-01 17:49:07.851 # no non-identical "pretty close" pair
2025-07-01 17:49:07.851 if eqi is None:
2025-07-01 17:49:07.851 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.851 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.851 return
2025-07-01 17:49:07.851 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.851 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.851 else:
2025-07-01 17:49:07.851 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.851 eqi = None
2025-07-01 17:49:07.852
2025-07-01 17:49:07.852 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.852 # identical
2025-07-01 17:49:07.852
2025-07-01 17:49:07.852 # pump out diffs from before the synch point
2025-07-01 17:49:07.852 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.852
2025-07-01 17:49:07.852 # do intraline marking on the synch pair
2025-07-01 17:49:07.852 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.852 if eqi is None:
2025-07-01 17:49:07.852 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.852 atags = btags = ""
2025-07-01 17:49:07.852 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.852 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.852 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.852 if tag == 'replace':
2025-07-01 17:49:07.852 atags += '^' * la
2025-07-01 17:49:07.852 btags += '^' * lb
2025-07-01 17:49:07.852 elif tag == 'delete':
2025-07-01 17:49:07.852 atags += '-' * la
2025-07-01 17:49:07.852 elif tag == 'insert':
2025-07-01 17:49:07.853 btags += '+' * lb
2025-07-01 17:49:07.853 elif tag == 'equal':
2025-07-01 17:49:07.853 atags += ' ' * la
2025-07-01 17:49:07.853 btags += ' ' * lb
2025-07-01 17:49:07.853 else:
2025-07-01 17:49:07.853 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.853 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.853 else:
2025-07-01 17:49:07.853 # the synch pair is identical
2025-07-01 17:49:07.853 yield ' ' + aelt
2025-07-01 17:49:07.853
2025-07-01 17:49:07.853 # pump out diffs from after the synch point
2025-07-01 17:49:07.853 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.853
2025-07-01 17:49:07.853 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.853 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.853
2025-07-01 17:49:07.853 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.853 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.853 alo = 320, ahi = 1101
2025-07-01 17:49:07.854 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.854 blo = 320, bhi = 1101
2025-07-01 17:49:07.854
2025-07-01 17:49:07.854 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.854 g = []
2025-07-01 17:49:07.854 if alo < ahi:
2025-07-01 17:49:07.854 if blo < bhi:
2025-07-01 17:49:07.854 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.854 else:
2025-07-01 17:49:07.854 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.854 elif blo < bhi:
2025-07-01 17:49:07.854 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.854
2025-07-01 17:49:07.854 > yield from g
2025-07-01 17:49:07.854
2025-07-01 17:49:07.854 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.854 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.854
2025-07-01 17:49:07.854 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.855 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.855 alo = 320, ahi = 1101
2025-07-01 17:49:07.855 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.855 blo = 320, bhi = 1101
2025-07-01 17:49:07.855
2025-07-01 17:49:07.855 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.855 r"""
2025-07-01 17:49:07.855 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.855 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.855 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.855 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.855
2025-07-01 17:49:07.855 Example:
2025-07-01 17:49:07.855
2025-07-01 17:49:07.855 >>> d = Differ()
2025-07-01 17:49:07.855 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.855 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.855 >>> print(''.join(results), end="")
2025-07-01 17:49:07.855 - abcDefghiJkl
2025-07-01 17:49:07.856 + abcdefGhijkl
2025-07-01 17:49:07.856 """
2025-07-01 17:49:07.856
2025-07-01 17:49:07.856 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.856 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.856 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.856 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.856 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.856
2025-07-01 17:49:07.856 # search for the pair that matches best without being identical
2025-07-01 17:49:07.856 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.856 # on junk -- unless we have to)
2025-07-01 17:49:07.856 for j in range(blo, bhi):
2025-07-01 17:49:07.856 bj = b[j]
2025-07-01 17:49:07.856 cruncher.set_seq2(bj)
2025-07-01 17:49:07.856 for i in range(alo, ahi):
2025-07-01 17:49:07.856 ai = a[i]
2025-07-01 17:49:07.856 if ai == bj:
2025-07-01 17:49:07.856 if eqi is None:
2025-07-01 17:49:07.856 eqi, eqj = i, j
2025-07-01 17:49:07.857 continue
2025-07-01 17:49:07.857 cruncher.set_seq1(ai)
2025-07-01 17:49:07.857 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.857 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.857 # compares by a factor of 3.
2025-07-01 17:49:07.857 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.857 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.857 # of the computation is cached by cruncher
2025-07-01 17:49:07.857 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.857 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.857 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.857 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.857 if best_ratio < cutoff:
2025-07-01 17:49:07.857 # no non-identical "pretty close" pair
2025-07-01 17:49:07.857 if eqi is None:
2025-07-01 17:49:07.857 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.857 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.857 return
2025-07-01 17:49:07.857 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.857 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.857 else:
2025-07-01 17:49:07.858 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.858 eqi = None
2025-07-01 17:49:07.858
2025-07-01 17:49:07.858 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.858 # identical
2025-07-01 17:49:07.858
2025-07-01 17:49:07.858 # pump out diffs from before the synch point
2025-07-01 17:49:07.858 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.858
2025-07-01 17:49:07.858 # do intraline marking on the synch pair
2025-07-01 17:49:07.858 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.858 if eqi is None:
2025-07-01 17:49:07.858 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.858 atags = btags = ""
2025-07-01 17:49:07.858 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.858 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.858 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.858 if tag == 'replace':
2025-07-01 17:49:07.858 atags += '^' * la
2025-07-01 17:49:07.858 btags += '^' * lb
2025-07-01 17:49:07.858 elif tag == 'delete':
2025-07-01 17:49:07.859 atags += '-' * la
2025-07-01 17:49:07.859 elif tag == 'insert':
2025-07-01 17:49:07.859 btags += '+' * lb
2025-07-01 17:49:07.859 elif tag == 'equal':
2025-07-01 17:49:07.859 atags += ' ' * la
2025-07-01 17:49:07.859 btags += ' ' * lb
2025-07-01 17:49:07.859 else:
2025-07-01 17:49:07.859 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.859 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.859 else:
2025-07-01 17:49:07.859 # the synch pair is identical
2025-07-01 17:49:07.859 yield ' ' + aelt
2025-07-01 17:49:07.859
2025-07-01 17:49:07.859 # pump out diffs from after the synch point
2025-07-01 17:49:07.859 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.859
2025-07-01 17:49:07.859 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.859 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.859
2025-07-01 17:49:07.859 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.859 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.859 alo = 321, ahi = 1101
2025-07-01 17:49:07.864 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.865 blo = 321, bhi = 1101
2025-07-01 17:49:07.865
2025-07-01 17:49:07.865 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.865 g = []
2025-07-01 17:49:07.865 if alo < ahi:
2025-07-01 17:49:07.865 if blo < bhi:
2025-07-01 17:49:07.865 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.865 else:
2025-07-01 17:49:07.865 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.865 elif blo < bhi:
2025-07-01 17:49:07.865 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.865
2025-07-01 17:49:07.865 > yield from g
2025-07-01 17:49:07.865
2025-07-01 17:49:07.865 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.865 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.865
2025-07-01 17:49:07.865 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.865 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.865 alo = 321, ahi = 1101
2025-07-01 17:49:07.866 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.866 blo = 321, bhi = 1101
2025-07-01 17:49:07.866
2025-07-01 17:49:07.866 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.866 r"""
2025-07-01 17:49:07.866 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.866 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.866 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.866 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.866
2025-07-01 17:49:07.866 Example:
2025-07-01 17:49:07.866
2025-07-01 17:49:07.866 >>> d = Differ()
2025-07-01 17:49:07.866 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.866 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.866 >>> print(''.join(results), end="")
2025-07-01 17:49:07.866 - abcDefghiJkl
2025-07-01 17:49:07.866 + abcdefGhijkl
2025-07-01 17:49:07.866 """
2025-07-01 17:49:07.867
2025-07-01 17:49:07.867 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.867 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.867 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.867 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.867 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.867
2025-07-01 17:49:07.867 # search for the pair that matches best without being identical
2025-07-01 17:49:07.867 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.867 # on junk -- unless we have to)
2025-07-01 17:49:07.867 for j in range(blo, bhi):
2025-07-01 17:49:07.867 bj = b[j]
2025-07-01 17:49:07.867 cruncher.set_seq2(bj)
2025-07-01 17:49:07.867 for i in range(alo, ahi):
2025-07-01 17:49:07.867 ai = a[i]
2025-07-01 17:49:07.867 if ai == bj:
2025-07-01 17:49:07.867 if eqi is None:
2025-07-01 17:49:07.867 eqi, eqj = i, j
2025-07-01 17:49:07.868 continue
2025-07-01 17:49:07.868 cruncher.set_seq1(ai)
2025-07-01 17:49:07.868 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.868 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.868 # compares by a factor of 3.
2025-07-01 17:49:07.868 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.868 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.868 # of the computation is cached by cruncher
2025-07-01 17:49:07.868 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.868 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.868 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.868 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.868 if best_ratio < cutoff:
2025-07-01 17:49:07.868 # no non-identical "pretty close" pair
2025-07-01 17:49:07.868 if eqi is None:
2025-07-01 17:49:07.868 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.868 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.868 return
2025-07-01 17:49:07.868 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.868 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.868 else:
2025-07-01 17:49:07.869 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.869 eqi = None
2025-07-01 17:49:07.869
2025-07-01 17:49:07.869 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.869 # identical
2025-07-01 17:49:07.869
2025-07-01 17:49:07.869 # pump out diffs from before the synch point
2025-07-01 17:49:07.869 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.869
2025-07-01 17:49:07.869 # do intraline marking on the synch pair
2025-07-01 17:49:07.869 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.869 if eqi is None:
2025-07-01 17:49:07.869 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.869 atags = btags = ""
2025-07-01 17:49:07.869 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.869 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.869 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.869 if tag == 'replace':
2025-07-01 17:49:07.869 atags += '^' * la
2025-07-01 17:49:07.869 btags += '^' * lb
2025-07-01 17:49:07.870 elif tag == 'delete':
2025-07-01 17:49:07.870 atags += '-' * la
2025-07-01 17:49:07.870 elif tag == 'insert':
2025-07-01 17:49:07.870 btags += '+' * lb
2025-07-01 17:49:07.870 elif tag == 'equal':
2025-07-01 17:49:07.870 atags += ' ' * la
2025-07-01 17:49:07.870 btags += ' ' * lb
2025-07-01 17:49:07.870 else:
2025-07-01 17:49:07.870 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.870 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.870 else:
2025-07-01 17:49:07.870 # the synch pair is identical
2025-07-01 17:49:07.870 yield ' ' + aelt
2025-07-01 17:49:07.870
2025-07-01 17:49:07.870 # pump out diffs from after the synch point
2025-07-01 17:49:07.870 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.870
2025-07-01 17:49:07.870 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.870 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.870
2025-07-01 17:49:07.870 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.871 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.871 alo = 322, ahi = 1101
2025-07-01 17:49:07.871 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.871 blo = 322, bhi = 1101
2025-07-01 17:49:07.871
2025-07-01 17:49:07.871 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.871 g = []
2025-07-01 17:49:07.871 if alo < ahi:
2025-07-01 17:49:07.871 if blo < bhi:
2025-07-01 17:49:07.871 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.871 else:
2025-07-01 17:49:07.871 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.871 elif blo < bhi:
2025-07-01 17:49:07.871 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.871
2025-07-01 17:49:07.871 > yield from g
2025-07-01 17:49:07.871
2025-07-01 17:49:07.871 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.871 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.871
2025-07-01 17:49:07.871 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.872 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.872 alo = 322, ahi = 1101
2025-07-01 17:49:07.872 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.872 blo = 322, bhi = 1101
2025-07-01 17:49:07.872
2025-07-01 17:49:07.872 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.872 r"""
2025-07-01 17:49:07.872 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.872 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.872 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.872 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.872
2025-07-01 17:49:07.872 Example:
2025-07-01 17:49:07.872
2025-07-01 17:49:07.872 >>> d = Differ()
2025-07-01 17:49:07.872 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.872 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.872 >>> print(''.join(results), end="")
2025-07-01 17:49:07.872 - abcDefghiJkl
2025-07-01 17:49:07.873 + abcdefGhijkl
2025-07-01 17:49:07.873 """
2025-07-01 17:49:07.873
2025-07-01 17:49:07.873 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.873 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.873 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.873 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.873 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.873
2025-07-01 17:49:07.873 # search for the pair that matches best without being identical
2025-07-01 17:49:07.873 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.873 # on junk -- unless we have to)
2025-07-01 17:49:07.873 for j in range(blo, bhi):
2025-07-01 17:49:07.873 bj = b[j]
2025-07-01 17:49:07.873 cruncher.set_seq2(bj)
2025-07-01 17:49:07.873 for i in range(alo, ahi):
2025-07-01 17:49:07.873 ai = a[i]
2025-07-01 17:49:07.873 if ai == bj:
2025-07-01 17:49:07.873 if eqi is None:
2025-07-01 17:49:07.873 eqi, eqj = i, j
2025-07-01 17:49:07.874 continue
2025-07-01 17:49:07.874 cruncher.set_seq1(ai)
2025-07-01 17:49:07.874 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.874 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.874 # compares by a factor of 3.
2025-07-01 17:49:07.874 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.874 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.874 # of the computation is cached by cruncher
2025-07-01 17:49:07.874 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.874 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.874 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.874 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.874 if best_ratio < cutoff:
2025-07-01 17:49:07.874 # no non-identical "pretty close" pair
2025-07-01 17:49:07.874 if eqi is None:
2025-07-01 17:49:07.874 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.874 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.874 return
2025-07-01 17:49:07.874 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.874 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.874 else:
2025-07-01 17:49:07.875 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.877 eqi = None
2025-07-01 17:49:07.877
2025-07-01 17:49:07.878 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.878 # identical
2025-07-01 17:49:07.878
2025-07-01 17:49:07.878 # pump out diffs from before the synch point
2025-07-01 17:49:07.878 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.878
2025-07-01 17:49:07.878 # do intraline marking on the synch pair
2025-07-01 17:49:07.878 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.878 if eqi is None:
2025-07-01 17:49:07.878 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.878 atags = btags = ""
2025-07-01 17:49:07.878 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.878 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.878 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.878 if tag == 'replace':
2025-07-01 17:49:07.878 atags += '^' * la
2025-07-01 17:49:07.878 btags += '^' * lb
2025-07-01 17:49:07.878 elif tag == 'delete':
2025-07-01 17:49:07.879 atags += '-' * la
2025-07-01 17:49:07.879 elif tag == 'insert':
2025-07-01 17:49:07.879 btags += '+' * lb
2025-07-01 17:49:07.879 elif tag == 'equal':
2025-07-01 17:49:07.879 atags += ' ' * la
2025-07-01 17:49:07.879 btags += ' ' * lb
2025-07-01 17:49:07.879 else:
2025-07-01 17:49:07.879 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.879 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.879 else:
2025-07-01 17:49:07.879 # the synch pair is identical
2025-07-01 17:49:07.879 yield ' ' + aelt
2025-07-01 17:49:07.879
2025-07-01 17:49:07.879 # pump out diffs from after the synch point
2025-07-01 17:49:07.879 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.879
2025-07-01 17:49:07.879 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.879 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.879
2025-07-01 17:49:07.879 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.879 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.879 alo = 323, ahi = 1101
2025-07-01 17:49:07.879 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.879 blo = 323, bhi = 1101
2025-07-01 17:49:07.879
2025-07-01 17:49:07.879 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.879 g = []
2025-07-01 17:49:07.879 if alo < ahi:
2025-07-01 17:49:07.879 if blo < bhi:
2025-07-01 17:49:07.880 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.880 else:
2025-07-01 17:49:07.880 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.880 elif blo < bhi:
2025-07-01 17:49:07.880 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.880
2025-07-01 17:49:07.880 > yield from g
2025-07-01 17:49:07.880
2025-07-01 17:49:07.880 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.880 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.880
2025-07-01 17:49:07.880 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.880 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.880 alo = 323, ahi = 1101
2025-07-01 17:49:07.880 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.880 blo = 323, bhi = 1101
2025-07-01 17:49:07.880
2025-07-01 17:49:07.880 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.881 r"""
2025-07-01 17:49:07.881 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.881 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.881 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.881 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.881
2025-07-01 17:49:07.881 Example:
2025-07-01 17:49:07.881
2025-07-01 17:49:07.881 >>> d = Differ()
2025-07-01 17:49:07.881 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.881 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.881 >>> print(''.join(results), end="")
2025-07-01 17:49:07.881 - abcDefghiJkl
2025-07-01 17:49:07.881 + abcdefGhijkl
2025-07-01 17:49:07.881 """
2025-07-01 17:49:07.881
2025-07-01 17:49:07.881 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.881 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.882 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.882 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.882 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.882
2025-07-01 17:49:07.882 # search for the pair that matches best without being identical
2025-07-01 17:49:07.882 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.882 # on junk -- unless we have to)
2025-07-01 17:49:07.882 for j in range(blo, bhi):
2025-07-01 17:49:07.882 bj = b[j]
2025-07-01 17:49:07.882 cruncher.set_seq2(bj)
2025-07-01 17:49:07.882 for i in range(alo, ahi):
2025-07-01 17:49:07.882 ai = a[i]
2025-07-01 17:49:07.882 if ai == bj:
2025-07-01 17:49:07.882 if eqi is None:
2025-07-01 17:49:07.882 eqi, eqj = i, j
2025-07-01 17:49:07.882 continue
2025-07-01 17:49:07.882 cruncher.set_seq1(ai)
2025-07-01 17:49:07.882 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.882 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.882 # compares by a factor of 3.
2025-07-01 17:49:07.882 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.883 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.883 # of the computation is cached by cruncher
2025-07-01 17:49:07.883 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.883 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.883 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.883 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.883 if best_ratio < cutoff:
2025-07-01 17:49:07.883 # no non-identical "pretty close" pair
2025-07-01 17:49:07.883 if eqi is None:
2025-07-01 17:49:07.883 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.883 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.883 return
2025-07-01 17:49:07.883 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.883 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.883 else:
2025-07-01 17:49:07.883 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.883 eqi = None
2025-07-01 17:49:07.883
2025-07-01 17:49:07.883 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.883 # identical
2025-07-01 17:49:07.883
2025-07-01 17:49:07.883 # pump out diffs from before the synch point
2025-07-01 17:49:07.884 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.884
2025-07-01 17:49:07.884 # do intraline marking on the synch pair
2025-07-01 17:49:07.884 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.884 if eqi is None:
2025-07-01 17:49:07.884 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.884 atags = btags = ""
2025-07-01 17:49:07.884 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.884 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.884 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.884 if tag == 'replace':
2025-07-01 17:49:07.884 atags += '^' * la
2025-07-01 17:49:07.884 btags += '^' * lb
2025-07-01 17:49:07.884 elif tag == 'delete':
2025-07-01 17:49:07.884 atags += '-' * la
2025-07-01 17:49:07.884 elif tag == 'insert':
2025-07-01 17:49:07.884 btags += '+' * lb
2025-07-01 17:49:07.884 elif tag == 'equal':
2025-07-01 17:49:07.884 atags += ' ' * la
2025-07-01 17:49:07.884 btags += ' ' * lb
2025-07-01 17:49:07.884 else:
2025-07-01 17:49:07.885 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.885 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.885 else:
2025-07-01 17:49:07.885 # the synch pair is identical
2025-07-01 17:49:07.885 yield ' ' + aelt
2025-07-01 17:49:07.885
2025-07-01 17:49:07.885 # pump out diffs from after the synch point
2025-07-01 17:49:07.885 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.885
2025-07-01 17:49:07.885 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.885 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.885
2025-07-01 17:49:07.885 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.885 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.885 alo = 324, ahi = 1101
2025-07-01 17:49:07.885 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.885 blo = 324, bhi = 1101
2025-07-01 17:49:07.885
2025-07-01 17:49:07.885 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.885 g = []
2025-07-01 17:49:07.885 if alo < ahi:
2025-07-01 17:49:07.885 if blo < bhi:
2025-07-01 17:49:07.886 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.886 else:
2025-07-01 17:49:07.886 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.886 elif blo < bhi:
2025-07-01 17:49:07.886 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.886
2025-07-01 17:49:07.886 > yield from g
2025-07-01 17:49:07.886
2025-07-01 17:49:07.886 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.886 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.886
2025-07-01 17:49:07.886 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.886 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.886 alo = 324, ahi = 1101
2025-07-01 17:49:07.886 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.886 blo = 324, bhi = 1101
2025-07-01 17:49:07.886
2025-07-01 17:49:07.886 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.887 r"""
2025-07-01 17:49:07.887 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.887 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.887 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.887 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.887
2025-07-01 17:49:07.887 Example:
2025-07-01 17:49:07.887
2025-07-01 17:49:07.887 >>> d = Differ()
2025-07-01 17:49:07.887 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.887 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.887 >>> print(''.join(results), end="")
2025-07-01 17:49:07.887 - abcDefghiJkl
2025-07-01 17:49:07.887 + abcdefGhijkl
2025-07-01 17:49:07.887 """
2025-07-01 17:49:07.887
2025-07-01 17:49:07.887 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.887 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.888 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.888 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.888 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.888
2025-07-01 17:49:07.888 # search for the pair that matches best without being identical
2025-07-01 17:49:07.888 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.888 # on junk -- unless we have to)
2025-07-01 17:49:07.888 for j in range(blo, bhi):
2025-07-01 17:49:07.888 bj = b[j]
2025-07-01 17:49:07.888 cruncher.set_seq2(bj)
2025-07-01 17:49:07.888 for i in range(alo, ahi):
2025-07-01 17:49:07.888 ai = a[i]
2025-07-01 17:49:07.888 if ai == bj:
2025-07-01 17:49:07.888 if eqi is None:
2025-07-01 17:49:07.888 eqi, eqj = i, j
2025-07-01 17:49:07.888 continue
2025-07-01 17:49:07.888 cruncher.set_seq1(ai)
2025-07-01 17:49:07.888 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.888 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.888 # compares by a factor of 3.
2025-07-01 17:49:07.888 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.889 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.889 # of the computation is cached by cruncher
2025-07-01 17:49:07.889 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.889 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.889 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.889 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.889 if best_ratio < cutoff:
2025-07-01 17:49:07.889 # no non-identical "pretty close" pair
2025-07-01 17:49:07.889 if eqi is None:
2025-07-01 17:49:07.889 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.889 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.889 return
2025-07-01 17:49:07.889 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.889 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.889 else:
2025-07-01 17:49:07.889 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.889 eqi = None
2025-07-01 17:49:07.889
2025-07-01 17:49:07.889 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.889 # identical
2025-07-01 17:49:07.889
2025-07-01 17:49:07.889 # pump out diffs from before the synch point
2025-07-01 17:49:07.895 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.895
2025-07-01 17:49:07.895 # do intraline marking on the synch pair
2025-07-01 17:49:07.895 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.896 if eqi is None:
2025-07-01 17:49:07.896 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.896 atags = btags = ""
2025-07-01 17:49:07.896 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.896 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.896 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.896 if tag == 'replace':
2025-07-01 17:49:07.896 atags += '^' * la
2025-07-01 17:49:07.896 btags += '^' * lb
2025-07-01 17:49:07.896 elif tag == 'delete':
2025-07-01 17:49:07.896 atags += '-' * la
2025-07-01 17:49:07.896 elif tag == 'insert':
2025-07-01 17:49:07.896 btags += '+' * lb
2025-07-01 17:49:07.896 elif tag == 'equal':
2025-07-01 17:49:07.896 atags += ' ' * la
2025-07-01 17:49:07.896 btags += ' ' * lb
2025-07-01 17:49:07.896 else:
2025-07-01 17:49:07.896 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.896 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.896 else:
2025-07-01 17:49:07.897 # the synch pair is identical
2025-07-01 17:49:07.897 yield ' ' + aelt
2025-07-01 17:49:07.897
2025-07-01 17:49:07.897 # pump out diffs from after the synch point
2025-07-01 17:49:07.897 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.897
2025-07-01 17:49:07.897 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.897 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.897
2025-07-01 17:49:07.897 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.897 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.897 alo = 325, ahi = 1101
2025-07-01 17:49:07.897 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.897 blo = 325, bhi = 1101
2025-07-01 17:49:07.897
2025-07-01 17:49:07.897 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.897 g = []
2025-07-01 17:49:07.897 if alo < ahi:
2025-07-01 17:49:07.897 if blo < bhi:
2025-07-01 17:49:07.897 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.897 else:
2025-07-01 17:49:07.898 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.898 elif blo < bhi:
2025-07-01 17:49:07.898 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.898
2025-07-01 17:49:07.898 > yield from g
2025-07-01 17:49:07.898
2025-07-01 17:49:07.898 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.898 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.898
2025-07-01 17:49:07.898 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.898 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.898 alo = 325, ahi = 1101
2025-07-01 17:49:07.898 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.898 blo = 325, bhi = 1101
2025-07-01 17:49:07.898
2025-07-01 17:49:07.898 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.898 r"""
2025-07-01 17:49:07.898 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.898 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.898 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.898 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.899
2025-07-01 17:49:07.899 Example:
2025-07-01 17:49:07.899
2025-07-01 17:49:07.899 >>> d = Differ()
2025-07-01 17:49:07.899 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.899 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.899 >>> print(''.join(results), end="")
2025-07-01 17:49:07.899 - abcDefghiJkl
2025-07-01 17:49:07.899 + abcdefGhijkl
2025-07-01 17:49:07.899 """
2025-07-01 17:49:07.899
2025-07-01 17:49:07.899 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.899 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.899 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.899 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.899 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.899
2025-07-01 17:49:07.899 # search for the pair that matches best without being identical
2025-07-01 17:49:07.899 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.899 # on junk -- unless we have to)
2025-07-01 17:49:07.900 for j in range(blo, bhi):
2025-07-01 17:49:07.900 bj = b[j]
2025-07-01 17:49:07.900 cruncher.set_seq2(bj)
2025-07-01 17:49:07.900 for i in range(alo, ahi):
2025-07-01 17:49:07.900 ai = a[i]
2025-07-01 17:49:07.900 if ai == bj:
2025-07-01 17:49:07.900 if eqi is None:
2025-07-01 17:49:07.900 eqi, eqj = i, j
2025-07-01 17:49:07.900 continue
2025-07-01 17:49:07.900 cruncher.set_seq1(ai)
2025-07-01 17:49:07.900 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.900 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.900 # compares by a factor of 3.
2025-07-01 17:49:07.900 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.900 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.900 # of the computation is cached by cruncher
2025-07-01 17:49:07.900 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.900 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.900 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.901 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.901 if best_ratio < cutoff:
2025-07-01 17:49:07.901 # no non-identical "pretty close" pair
2025-07-01 17:49:07.901 if eqi is None:
2025-07-01 17:49:07.901 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.901 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.901 return
2025-07-01 17:49:07.901 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.901 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.901 else:
2025-07-01 17:49:07.901 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.901 eqi = None
2025-07-01 17:49:07.901
2025-07-01 17:49:07.901 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.901 # identical
2025-07-01 17:49:07.901
2025-07-01 17:49:07.901 # pump out diffs from before the synch point
2025-07-01 17:49:07.901 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.901
2025-07-01 17:49:07.901 # do intraline marking on the synch pair
2025-07-01 17:49:07.902 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.902 if eqi is None:
2025-07-01 17:49:07.902 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.902 atags = btags = ""
2025-07-01 17:49:07.902 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.902 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.902 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.902 if tag == 'replace':
2025-07-01 17:49:07.902 atags += '^' * la
2025-07-01 17:49:07.902 btags += '^' * lb
2025-07-01 17:49:07.902 elif tag == 'delete':
2025-07-01 17:49:07.902 atags += '-' * la
2025-07-01 17:49:07.902 elif tag == 'insert':
2025-07-01 17:49:07.902 btags += '+' * lb
2025-07-01 17:49:07.902 elif tag == 'equal':
2025-07-01 17:49:07.902 atags += ' ' * la
2025-07-01 17:49:07.902 btags += ' ' * lb
2025-07-01 17:49:07.902 else:
2025-07-01 17:49:07.902 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.902 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.903 else:
2025-07-01 17:49:07.903 # the synch pair is identical
2025-07-01 17:49:07.903 yield ' ' + aelt
2025-07-01 17:49:07.903
2025-07-01 17:49:07.903 # pump out diffs from after the synch point
2025-07-01 17:49:07.903 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.903
2025-07-01 17:49:07.903 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.903 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.903
2025-07-01 17:49:07.903 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.903 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.903 alo = 326, ahi = 1101
2025-07-01 17:49:07.903 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.903 blo = 326, bhi = 1101
2025-07-01 17:49:07.903
2025-07-01 17:49:07.903 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.903 g = []
2025-07-01 17:49:07.903 if alo < ahi:
2025-07-01 17:49:07.903 if blo < bhi:
2025-07-01 17:49:07.903 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.904 else:
2025-07-01 17:49:07.904 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.904 elif blo < bhi:
2025-07-01 17:49:07.904 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.904
2025-07-01 17:49:07.904 > yield from g
2025-07-01 17:49:07.904
2025-07-01 17:49:07.904 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.904 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.904
2025-07-01 17:49:07.904 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.904 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.904 alo = 326, ahi = 1101
2025-07-01 17:49:07.904 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.904 blo = 326, bhi = 1101
2025-07-01 17:49:07.904
2025-07-01 17:49:07.904 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.904 r"""
2025-07-01 17:49:07.904 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.904 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.905 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.905 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.905
2025-07-01 17:49:07.905 Example:
2025-07-01 17:49:07.905
2025-07-01 17:49:07.905 >>> d = Differ()
2025-07-01 17:49:07.905 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.905 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.905 >>> print(''.join(results), end="")
2025-07-01 17:49:07.905 - abcDefghiJkl
2025-07-01 17:49:07.905 + abcdefGhijkl
2025-07-01 17:49:07.905 """
2025-07-01 17:49:07.905
2025-07-01 17:49:07.905 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.905 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.905 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.905 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.905 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.905
2025-07-01 17:49:07.908 # search for the pair that matches best without being identical
2025-07-01 17:49:07.909 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.909 # on junk -- unless we have to)
2025-07-01 17:49:07.909 for j in range(blo, bhi):
2025-07-01 17:49:07.909 bj = b[j]
2025-07-01 17:49:07.909 cruncher.set_seq2(bj)
2025-07-01 17:49:07.909 for i in range(alo, ahi):
2025-07-01 17:49:07.909 ai = a[i]
2025-07-01 17:49:07.909 if ai == bj:
2025-07-01 17:49:07.909 if eqi is None:
2025-07-01 17:49:07.909 eqi, eqj = i, j
2025-07-01 17:49:07.909 continue
2025-07-01 17:49:07.909 cruncher.set_seq1(ai)
2025-07-01 17:49:07.909 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.909 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.909 # compares by a factor of 3.
2025-07-01 17:49:07.909 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.909 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.909 # of the computation is cached by cruncher
2025-07-01 17:49:07.909 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.909 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.910 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.910 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.910 if best_ratio < cutoff:
2025-07-01 17:49:07.910 # no non-identical "pretty close" pair
2025-07-01 17:49:07.910 if eqi is None:
2025-07-01 17:49:07.910 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.910 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.910 return
2025-07-01 17:49:07.910 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.910 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.910 else:
2025-07-01 17:49:07.910 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.910 eqi = None
2025-07-01 17:49:07.910
2025-07-01 17:49:07.910 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.910 # identical
2025-07-01 17:49:07.910
2025-07-01 17:49:07.910 # pump out diffs from before the synch point
2025-07-01 17:49:07.910 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.910
2025-07-01 17:49:07.910 # do intraline marking on the synch pair
2025-07-01 17:49:07.911 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.911 if eqi is None:
2025-07-01 17:49:07.911 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.911 atags = btags = ""
2025-07-01 17:49:07.911 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.911 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.911 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.911 if tag == 'replace':
2025-07-01 17:49:07.911 atags += '^' * la
2025-07-01 17:49:07.911 btags += '^' * lb
2025-07-01 17:49:07.911 elif tag == 'delete':
2025-07-01 17:49:07.911 atags += '-' * la
2025-07-01 17:49:07.911 elif tag == 'insert':
2025-07-01 17:49:07.911 btags += '+' * lb
2025-07-01 17:49:07.911 elif tag == 'equal':
2025-07-01 17:49:07.911 atags += ' ' * la
2025-07-01 17:49:07.911 btags += ' ' * lb
2025-07-01 17:49:07.911 else:
2025-07-01 17:49:07.911 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.911 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.911 else:
2025-07-01 17:49:07.912 # the synch pair is identical
2025-07-01 17:49:07.912 yield ' ' + aelt
2025-07-01 17:49:07.912
2025-07-01 17:49:07.912 # pump out diffs from after the synch point
2025-07-01 17:49:07.912 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.912
2025-07-01 17:49:07.912 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.912 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.912
2025-07-01 17:49:07.912 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.912 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.912 alo = 327, ahi = 1101
2025-07-01 17:49:07.912 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.912 blo = 327, bhi = 1101
2025-07-01 17:49:07.912
2025-07-01 17:49:07.912 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.912 g = []
2025-07-01 17:49:07.912 if alo < ahi:
2025-07-01 17:49:07.913 if blo < bhi:
2025-07-01 17:49:07.913 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.913 else:
2025-07-01 17:49:07.913 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.913 elif blo < bhi:
2025-07-01 17:49:07.913 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.913
2025-07-01 17:49:07.913 > yield from g
2025-07-01 17:49:07.913
2025-07-01 17:49:07.913 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.913 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.913
2025-07-01 17:49:07.913 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.913 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.913 alo = 327, ahi = 1101
2025-07-01 17:49:07.913 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.913 blo = 327, bhi = 1101
2025-07-01 17:49:07.913
2025-07-01 17:49:07.913 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.913 r"""
2025-07-01 17:49:07.914 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.914 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.914 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.914 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.914
2025-07-01 17:49:07.914 Example:
2025-07-01 17:49:07.914
2025-07-01 17:49:07.914 >>> d = Differ()
2025-07-01 17:49:07.914 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.914 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.914 >>> print(''.join(results), end="")
2025-07-01 17:49:07.914 - abcDefghiJkl
2025-07-01 17:49:07.914 + abcdefGhijkl
2025-07-01 17:49:07.914 """
2025-07-01 17:49:07.914
2025-07-01 17:49:07.914 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.914 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.914 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.914 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.915 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.915
2025-07-01 17:49:07.915 # search for the pair that matches best without being identical
2025-07-01 17:49:07.915 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.915 # on junk -- unless we have to)
2025-07-01 17:49:07.915 for j in range(blo, bhi):
2025-07-01 17:49:07.915 bj = b[j]
2025-07-01 17:49:07.915 cruncher.set_seq2(bj)
2025-07-01 17:49:07.915 for i in range(alo, ahi):
2025-07-01 17:49:07.915 ai = a[i]
2025-07-01 17:49:07.915 if ai == bj:
2025-07-01 17:49:07.915 if eqi is None:
2025-07-01 17:49:07.915 eqi, eqj = i, j
2025-07-01 17:49:07.915 continue
2025-07-01 17:49:07.915 cruncher.set_seq1(ai)
2025-07-01 17:49:07.915 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.915 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.915 # compares by a factor of 3.
2025-07-01 17:49:07.915 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.915 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.915 # of the computation is cached by cruncher
2025-07-01 17:49:07.916 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.916 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.916 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.916 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.916 if best_ratio < cutoff:
2025-07-01 17:49:07.916 # no non-identical "pretty close" pair
2025-07-01 17:49:07.916 if eqi is None:
2025-07-01 17:49:07.916 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.916 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.916 return
2025-07-01 17:49:07.916 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.916 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.916 else:
2025-07-01 17:49:07.916 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.916 eqi = None
2025-07-01 17:49:07.916
2025-07-01 17:49:07.916 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.916 # identical
2025-07-01 17:49:07.916
2025-07-01 17:49:07.916 # pump out diffs from before the synch point
2025-07-01 17:49:07.917 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.917
2025-07-01 17:49:07.917 # do intraline marking on the synch pair
2025-07-01 17:49:07.917 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.917 if eqi is None:
2025-07-01 17:49:07.917 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.917 atags = btags = ""
2025-07-01 17:49:07.917 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.917 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.917 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.917 if tag == 'replace':
2025-07-01 17:49:07.917 atags += '^' * la
2025-07-01 17:49:07.917 btags += '^' * lb
2025-07-01 17:49:07.917 elif tag == 'delete':
2025-07-01 17:49:07.917 atags += '-' * la
2025-07-01 17:49:07.917 elif tag == 'insert':
2025-07-01 17:49:07.917 btags += '+' * lb
2025-07-01 17:49:07.917 elif tag == 'equal':
2025-07-01 17:49:07.917 atags += ' ' * la
2025-07-01 17:49:07.917 btags += ' ' * lb
2025-07-01 17:49:07.917 else:
2025-07-01 17:49:07.918 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.918 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.918 else:
2025-07-01 17:49:07.918 # the synch pair is identical
2025-07-01 17:49:07.918 yield ' ' + aelt
2025-07-01 17:49:07.918
2025-07-01 17:49:07.918 # pump out diffs from after the synch point
2025-07-01 17:49:07.918 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.918
2025-07-01 17:49:07.918 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.918 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.918
2025-07-01 17:49:07.918 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.918 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.918 alo = 328, ahi = 1101
2025-07-01 17:49:07.918 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.918 blo = 328, bhi = 1101
2025-07-01 17:49:07.918
2025-07-01 17:49:07.918 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.918 g = []
2025-07-01 17:49:07.919 if alo < ahi:
2025-07-01 17:49:07.919 if blo < bhi:
2025-07-01 17:49:07.919 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.919 else:
2025-07-01 17:49:07.919 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.919 elif blo < bhi:
2025-07-01 17:49:07.919 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.919
2025-07-01 17:49:07.919 > yield from g
2025-07-01 17:49:07.919
2025-07-01 17:49:07.919 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.919 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.919
2025-07-01 17:49:07.919 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.919 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.919 alo = 328, ahi = 1101
2025-07-01 17:49:07.919 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.919 blo = 328, bhi = 1101
2025-07-01 17:49:07.919
2025-07-01 17:49:07.919 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.919 r"""
2025-07-01 17:49:07.920 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.920 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.920 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.920 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.920
2025-07-01 17:49:07.920 Example:
2025-07-01 17:49:07.920
2025-07-01 17:49:07.920 >>> d = Differ()
2025-07-01 17:49:07.920 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.920 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.920 >>> print(''.join(results), end="")
2025-07-01 17:49:07.920 - abcDefghiJkl
2025-07-01 17:49:07.920 + abcdefGhijkl
2025-07-01 17:49:07.920 """
2025-07-01 17:49:07.920
2025-07-01 17:49:07.920 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.920 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.921 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.921 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.921 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.921
2025-07-01 17:49:07.921 # search for the pair that matches best without being identical
2025-07-01 17:49:07.921 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.921 # on junk -- unless we have to)
2025-07-01 17:49:07.921 for j in range(blo, bhi):
2025-07-01 17:49:07.921 bj = b[j]
2025-07-01 17:49:07.921 cruncher.set_seq2(bj)
2025-07-01 17:49:07.921 for i in range(alo, ahi):
2025-07-01 17:49:07.921 ai = a[i]
2025-07-01 17:49:07.921 if ai == bj:
2025-07-01 17:49:07.921 if eqi is None:
2025-07-01 17:49:07.921 eqi, eqj = i, j
2025-07-01 17:49:07.921 continue
2025-07-01 17:49:07.926 cruncher.set_seq1(ai)
2025-07-01 17:49:07.926 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.926 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.927 # compares by a factor of 3.
2025-07-01 17:49:07.927 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.927 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.927 # of the computation is cached by cruncher
2025-07-01 17:49:07.927 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.927 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.927 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.927 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.927 if best_ratio < cutoff:
2025-07-01 17:49:07.927 # no non-identical "pretty close" pair
2025-07-01 17:49:07.927 if eqi is None:
2025-07-01 17:49:07.927 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.927 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.927 return
2025-07-01 17:49:07.927 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.927 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.928 else:
2025-07-01 17:49:07.928 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.928 eqi = None
2025-07-01 17:49:07.928
2025-07-01 17:49:07.928 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.928 # identical
2025-07-01 17:49:07.928
2025-07-01 17:49:07.928 # pump out diffs from before the synch point
2025-07-01 17:49:07.928 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.928
2025-07-01 17:49:07.928 # do intraline marking on the synch pair
2025-07-01 17:49:07.928 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.928 if eqi is None:
2025-07-01 17:49:07.928 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.928 atags = btags = ""
2025-07-01 17:49:07.928 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.928 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.929 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.929 if tag == 'replace':
2025-07-01 17:49:07.929 atags += '^' * la
2025-07-01 17:49:07.929 btags += '^' * lb
2025-07-01 17:49:07.929 elif tag == 'delete':
2025-07-01 17:49:07.929 atags += '-' * la
2025-07-01 17:49:07.929 elif tag == 'insert':
2025-07-01 17:49:07.929 btags += '+' * lb
2025-07-01 17:49:07.929 elif tag == 'equal':
2025-07-01 17:49:07.929 atags += ' ' * la
2025-07-01 17:49:07.929 btags += ' ' * lb
2025-07-01 17:49:07.929 else:
2025-07-01 17:49:07.929 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.929 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.929 else:
2025-07-01 17:49:07.929 # the synch pair is identical
2025-07-01 17:49:07.930 yield ' ' + aelt
2025-07-01 17:49:07.930
2025-07-01 17:49:07.930 # pump out diffs from after the synch point
2025-07-01 17:49:07.930 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.930
2025-07-01 17:49:07.930 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.930 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.930
2025-07-01 17:49:07.930 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.930 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.930 alo = 329, ahi = 1101
2025-07-01 17:49:07.930 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.930 blo = 329, bhi = 1101
2025-07-01 17:49:07.930
2025-07-01 17:49:07.930 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.930 g = []
2025-07-01 17:49:07.931 if alo < ahi:
2025-07-01 17:49:07.931 if blo < bhi:
2025-07-01 17:49:07.931 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.931 else:
2025-07-01 17:49:07.931 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.931 elif blo < bhi:
2025-07-01 17:49:07.931 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.931
2025-07-01 17:49:07.931 > yield from g
2025-07-01 17:49:07.931
2025-07-01 17:49:07.931 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.931 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.931
2025-07-01 17:49:07.931 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.931 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.931 alo = 329, ahi = 1101
2025-07-01 17:49:07.931 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.932 blo = 329, bhi = 1101
2025-07-01 17:49:07.932
2025-07-01 17:49:07.932 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.932 r"""
2025-07-01 17:49:07.932 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.932 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.932 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.932 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.932
2025-07-01 17:49:07.932 Example:
2025-07-01 17:49:07.932
2025-07-01 17:49:07.932 >>> d = Differ()
2025-07-01 17:49:07.932 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.932 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.932 >>> print(''.join(results), end="")
2025-07-01 17:49:07.932 - abcDefghiJkl
2025-07-01 17:49:07.933 + abcdefGhijkl
2025-07-01 17:49:07.933 """
2025-07-01 17:49:07.933
2025-07-01 17:49:07.933 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.933 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.933 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.933 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.933 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.933
2025-07-01 17:49:07.933 # search for the pair that matches best without being identical
2025-07-01 17:49:07.933 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.933 # on junk -- unless we have to)
2025-07-01 17:49:07.933 for j in range(blo, bhi):
2025-07-01 17:49:07.933 bj = b[j]
2025-07-01 17:49:07.933 cruncher.set_seq2(bj)
2025-07-01 17:49:07.934 for i in range(alo, ahi):
2025-07-01 17:49:07.934 ai = a[i]
2025-07-01 17:49:07.934 if ai == bj:
2025-07-01 17:49:07.934 if eqi is None:
2025-07-01 17:49:07.934 eqi, eqj = i, j
2025-07-01 17:49:07.934 continue
2025-07-01 17:49:07.934 cruncher.set_seq1(ai)
2025-07-01 17:49:07.934 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.934 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.934 # compares by a factor of 3.
2025-07-01 17:49:07.934 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.934 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.934 # of the computation is cached by cruncher
2025-07-01 17:49:07.934 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.934 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.934 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.935 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.935 if best_ratio < cutoff:
2025-07-01 17:49:07.935 # no non-identical "pretty close" pair
2025-07-01 17:49:07.935 if eqi is None:
2025-07-01 17:49:07.935 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.935 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.935 return
2025-07-01 17:49:07.935 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.935 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.935 else:
2025-07-01 17:49:07.935 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.935 eqi = None
2025-07-01 17:49:07.935
2025-07-01 17:49:07.935 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.935 # identical
2025-07-01 17:49:07.935
2025-07-01 17:49:07.936 # pump out diffs from before the synch point
2025-07-01 17:49:07.936 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.936
2025-07-01 17:49:07.936 # do intraline marking on the synch pair
2025-07-01 17:49:07.936 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.936 if eqi is None:
2025-07-01 17:49:07.936 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.936 atags = btags = ""
2025-07-01 17:49:07.936 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.936 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.936 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.936 if tag == 'replace':
2025-07-01 17:49:07.936 atags += '^' * la
2025-07-01 17:49:07.936 btags += '^' * lb
2025-07-01 17:49:07.936 elif tag == 'delete':
2025-07-01 17:49:07.936 atags += '-' * la
2025-07-01 17:49:07.937 elif tag == 'insert':
2025-07-01 17:49:07.937 btags += '+' * lb
2025-07-01 17:49:07.937 elif tag == 'equal':
2025-07-01 17:49:07.937 atags += ' ' * la
2025-07-01 17:49:07.937 btags += ' ' * lb
2025-07-01 17:49:07.937 else:
2025-07-01 17:49:07.937 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.937 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.937 else:
2025-07-01 17:49:07.937 # the synch pair is identical
2025-07-01 17:49:07.937 yield ' ' + aelt
2025-07-01 17:49:07.937
2025-07-01 17:49:07.937 # pump out diffs from after the synch point
2025-07-01 17:49:07.937 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.937
2025-07-01 17:49:07.937 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.937 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.941
2025-07-01 17:49:07.941 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.941 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.941 alo = 330, ahi = 1101
2025-07-01 17:49:07.941 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.941 blo = 330, bhi = 1101
2025-07-01 17:49:07.941
2025-07-01 17:49:07.941 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.941 g = []
2025-07-01 17:49:07.941 if alo < ahi:
2025-07-01 17:49:07.942 if blo < bhi:
2025-07-01 17:49:07.942 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.942 else:
2025-07-01 17:49:07.942 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.942 elif blo < bhi:
2025-07-01 17:49:07.942 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.942
2025-07-01 17:49:07.942 > yield from g
2025-07-01 17:49:07.942
2025-07-01 17:49:07.942 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.942 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.942
2025-07-01 17:49:07.942 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.942 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.942 alo = 330, ahi = 1101
2025-07-01 17:49:07.942 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.942 blo = 330, bhi = 1101
2025-07-01 17:49:07.943
2025-07-01 17:49:07.943 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.943 r"""
2025-07-01 17:49:07.943 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.943 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.943 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.943 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.943
2025-07-01 17:49:07.943 Example:
2025-07-01 17:49:07.943
2025-07-01 17:49:07.943 >>> d = Differ()
2025-07-01 17:49:07.943 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.943 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.943 >>> print(''.join(results), end="")
2025-07-01 17:49:07.943 - abcDefghiJkl
2025-07-01 17:49:07.943 + abcdefGhijkl
2025-07-01 17:49:07.944 """
2025-07-01 17:49:07.944
2025-07-01 17:49:07.944 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.944 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.944 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.944 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.944 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.944
2025-07-01 17:49:07.944 # search for the pair that matches best without being identical
2025-07-01 17:49:07.944 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.944 # on junk -- unless we have to)
2025-07-01 17:49:07.944 for j in range(blo, bhi):
2025-07-01 17:49:07.944 bj = b[j]
2025-07-01 17:49:07.944 cruncher.set_seq2(bj)
2025-07-01 17:49:07.944 for i in range(alo, ahi):
2025-07-01 17:49:07.945 ai = a[i]
2025-07-01 17:49:07.945 if ai == bj:
2025-07-01 17:49:07.945 if eqi is None:
2025-07-01 17:49:07.945 eqi, eqj = i, j
2025-07-01 17:49:07.945 continue
2025-07-01 17:49:07.945 cruncher.set_seq1(ai)
2025-07-01 17:49:07.945 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.945 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.945 # compares by a factor of 3.
2025-07-01 17:49:07.945 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.945 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.945 # of the computation is cached by cruncher
2025-07-01 17:49:07.945 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.946 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.946 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.946 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.946 if best_ratio < cutoff:
2025-07-01 17:49:07.946 # no non-identical "pretty close" pair
2025-07-01 17:49:07.946 if eqi is None:
2025-07-01 17:49:07.946 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.946 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.946 return
2025-07-01 17:49:07.946 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.946 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.946 else:
2025-07-01 17:49:07.946 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.946 eqi = None
2025-07-01 17:49:07.946
2025-07-01 17:49:07.946 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.947 # identical
2025-07-01 17:49:07.947
2025-07-01 17:49:07.947 # pump out diffs from before the synch point
2025-07-01 17:49:07.947 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.947
2025-07-01 17:49:07.947 # do intraline marking on the synch pair
2025-07-01 17:49:07.947 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.947 if eqi is None:
2025-07-01 17:49:07.947 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.947 atags = btags = ""
2025-07-01 17:49:07.947 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.947 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.947 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.947 if tag == 'replace':
2025-07-01 17:49:07.947 atags += '^' * la
2025-07-01 17:49:07.947 btags += '^' * lb
2025-07-01 17:49:07.947 elif tag == 'delete':
2025-07-01 17:49:07.948 atags += '-' * la
2025-07-01 17:49:07.948 elif tag == 'insert':
2025-07-01 17:49:07.948 btags += '+' * lb
2025-07-01 17:49:07.948 elif tag == 'equal':
2025-07-01 17:49:07.948 atags += ' ' * la
2025-07-01 17:49:07.948 btags += ' ' * lb
2025-07-01 17:49:07.948 else:
2025-07-01 17:49:07.948 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.948 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.948 else:
2025-07-01 17:49:07.948 # the synch pair is identical
2025-07-01 17:49:07.948 yield ' ' + aelt
2025-07-01 17:49:07.948
2025-07-01 17:49:07.948 # pump out diffs from after the synch point
2025-07-01 17:49:07.948 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.948
2025-07-01 17:49:07.949 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.949 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.949
2025-07-01 17:49:07.949 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.949 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.949 alo = 331, ahi = 1101
2025-07-01 17:49:07.949 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.949 blo = 331, bhi = 1101
2025-07-01 17:49:07.949
2025-07-01 17:49:07.949 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.949 g = []
2025-07-01 17:49:07.949 if alo < ahi:
2025-07-01 17:49:07.949 if blo < bhi:
2025-07-01 17:49:07.949 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.949 else:
2025-07-01 17:49:07.950 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.950 elif blo < bhi:
2025-07-01 17:49:07.950 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.950
2025-07-01 17:49:07.950 > yield from g
2025-07-01 17:49:07.950
2025-07-01 17:49:07.950 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.950 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.950
2025-07-01 17:49:07.950 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.950 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.950 alo = 331, ahi = 1101
2025-07-01 17:49:07.950 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.950 blo = 331, bhi = 1101
2025-07-01 17:49:07.950
2025-07-01 17:49:07.950 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.951 r"""
2025-07-01 17:49:07.951 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.951 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.951 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.951 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.951
2025-07-01 17:49:07.951 Example:
2025-07-01 17:49:07.951
2025-07-01 17:49:07.951 >>> d = Differ()
2025-07-01 17:49:07.951 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.951 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.951 >>> print(''.join(results), end="")
2025-07-01 17:49:07.951 - abcDefghiJkl
2025-07-01 17:49:07.951 + abcdefGhijkl
2025-07-01 17:49:07.951 """
2025-07-01 17:49:07.952
2025-07-01 17:49:07.952 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.952 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.952 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.952 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.952 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.952
2025-07-01 17:49:07.952 # search for the pair that matches best without being identical
2025-07-01 17:49:07.952 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.952 # on junk -- unless we have to)
2025-07-01 17:49:07.952 for j in range(blo, bhi):
2025-07-01 17:49:07.952 bj = b[j]
2025-07-01 17:49:07.952 cruncher.set_seq2(bj)
2025-07-01 17:49:07.952 for i in range(alo, ahi):
2025-07-01 17:49:07.952 ai = a[i]
2025-07-01 17:49:07.952 if ai == bj:
2025-07-01 17:49:07.953 if eqi is None:
2025-07-01 17:49:07.953 eqi, eqj = i, j
2025-07-01 17:49:07.953 continue
2025-07-01 17:49:07.953 cruncher.set_seq1(ai)
2025-07-01 17:49:07.953 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.953 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.953 # compares by a factor of 3.
2025-07-01 17:49:07.953 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.953 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.953 # of the computation is cached by cruncher
2025-07-01 17:49:07.953 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.953 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.953 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.953 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.953 if best_ratio < cutoff:
2025-07-01 17:49:07.953 # no non-identical "pretty close" pair
2025-07-01 17:49:07.954 if eqi is None:
2025-07-01 17:49:07.959 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.959 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.959 return
2025-07-01 17:49:07.959 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.959 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.959 else:
2025-07-01 17:49:07.959 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.959 eqi = None
2025-07-01 17:49:07.959
2025-07-01 17:49:07.959 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.959 # identical
2025-07-01 17:49:07.959
2025-07-01 17:49:07.959 # pump out diffs from before the synch point
2025-07-01 17:49:07.959 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.960
2025-07-01 17:49:07.960 # do intraline marking on the synch pair
2025-07-01 17:49:07.960 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.960 if eqi is None:
2025-07-01 17:49:07.960 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.960 atags = btags = ""
2025-07-01 17:49:07.960 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.960 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.960 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.960 if tag == 'replace':
2025-07-01 17:49:07.960 atags += '^' * la
2025-07-01 17:49:07.960 btags += '^' * lb
2025-07-01 17:49:07.960 elif tag == 'delete':
2025-07-01 17:49:07.960 atags += '-' * la
2025-07-01 17:49:07.960 elif tag == 'insert':
2025-07-01 17:49:07.960 btags += '+' * lb
2025-07-01 17:49:07.960 elif tag == 'equal':
2025-07-01 17:49:07.961 atags += ' ' * la
2025-07-01 17:49:07.961 btags += ' ' * lb
2025-07-01 17:49:07.961 else:
2025-07-01 17:49:07.961 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.961 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.961 else:
2025-07-01 17:49:07.961 # the synch pair is identical
2025-07-01 17:49:07.961 yield ' ' + aelt
2025-07-01 17:49:07.961
2025-07-01 17:49:07.961 # pump out diffs from after the synch point
2025-07-01 17:49:07.961 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.961
2025-07-01 17:49:07.961 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.961 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.961
2025-07-01 17:49:07.961 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.962 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.962 alo = 334, ahi = 1101
2025-07-01 17:49:07.962 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.962 blo = 334, bhi = 1101
2025-07-01 17:49:07.962
2025-07-01 17:49:07.962 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.962 g = []
2025-07-01 17:49:07.962 if alo < ahi:
2025-07-01 17:49:07.962 if blo < bhi:
2025-07-01 17:49:07.962 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.962 else:
2025-07-01 17:49:07.962 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.962 elif blo < bhi:
2025-07-01 17:49:07.962 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.962
2025-07-01 17:49:07.962 > yield from g
2025-07-01 17:49:07.962
2025-07-01 17:49:07.963 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.963 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.963
2025-07-01 17:49:07.963 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.963 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.963 alo = 334, ahi = 1101
2025-07-01 17:49:07.963 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.963 blo = 334, bhi = 1101
2025-07-01 17:49:07.963
2025-07-01 17:49:07.963 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.963 r"""
2025-07-01 17:49:07.963 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.963 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.963 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.963 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.963
2025-07-01 17:49:07.964 Example:
2025-07-01 17:49:07.964
2025-07-01 17:49:07.964 >>> d = Differ()
2025-07-01 17:49:07.964 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.964 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.964 >>> print(''.join(results), end="")
2025-07-01 17:49:07.964 - abcDefghiJkl
2025-07-01 17:49:07.964 + abcdefGhijkl
2025-07-01 17:49:07.964 """
2025-07-01 17:49:07.964
2025-07-01 17:49:07.964 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.964 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.964 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.964 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.965 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.965
2025-07-01 17:49:07.965 # search for the pair that matches best without being identical
2025-07-01 17:49:07.965 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.965 # on junk -- unless we have to)
2025-07-01 17:49:07.965 for j in range(blo, bhi):
2025-07-01 17:49:07.965 bj = b[j]
2025-07-01 17:49:07.965 cruncher.set_seq2(bj)
2025-07-01 17:49:07.965 for i in range(alo, ahi):
2025-07-01 17:49:07.965 ai = a[i]
2025-07-01 17:49:07.965 if ai == bj:
2025-07-01 17:49:07.965 if eqi is None:
2025-07-01 17:49:07.965 eqi, eqj = i, j
2025-07-01 17:49:07.965 continue
2025-07-01 17:49:07.965 cruncher.set_seq1(ai)
2025-07-01 17:49:07.965 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.966 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.966 # compares by a factor of 3.
2025-07-01 17:49:07.966 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.966 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.966 # of the computation is cached by cruncher
2025-07-01 17:49:07.966 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.966 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.966 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.966 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.966 if best_ratio < cutoff:
2025-07-01 17:49:07.966 # no non-identical "pretty close" pair
2025-07-01 17:49:07.966 if eqi is None:
2025-07-01 17:49:07.966 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.966 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.966 return
2025-07-01 17:49:07.966 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.967 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.967 else:
2025-07-01 17:49:07.967 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.967 eqi = None
2025-07-01 17:49:07.967
2025-07-01 17:49:07.967 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.967 # identical
2025-07-01 17:49:07.967
2025-07-01 17:49:07.967 # pump out diffs from before the synch point
2025-07-01 17:49:07.967 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.967
2025-07-01 17:49:07.967 # do intraline marking on the synch pair
2025-07-01 17:49:07.967 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.967 if eqi is None:
2025-07-01 17:49:07.967 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.967 atags = btags = ""
2025-07-01 17:49:07.967 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.968 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.968 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.968 if tag == 'replace':
2025-07-01 17:49:07.968 atags += '^' * la
2025-07-01 17:49:07.968 btags += '^' * lb
2025-07-01 17:49:07.968 elif tag == 'delete':
2025-07-01 17:49:07.968 atags += '-' * la
2025-07-01 17:49:07.968 elif tag == 'insert':
2025-07-01 17:49:07.968 btags += '+' * lb
2025-07-01 17:49:07.968 elif tag == 'equal':
2025-07-01 17:49:07.968 atags += ' ' * la
2025-07-01 17:49:07.968 btags += ' ' * lb
2025-07-01 17:49:07.968 else:
2025-07-01 17:49:07.968 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.968 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.968 else:
2025-07-01 17:49:07.969 # the synch pair is identical
2025-07-01 17:49:07.969 yield ' ' + aelt
2025-07-01 17:49:07.969
2025-07-01 17:49:07.969 # pump out diffs from after the synch point
2025-07-01 17:49:07.969 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.969
2025-07-01 17:49:07.969 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.969 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.969
2025-07-01 17:49:07.969 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.969 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.969 alo = 335, ahi = 1101
2025-07-01 17:49:07.969 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.969 blo = 335, bhi = 1101
2025-07-01 17:49:07.969
2025-07-01 17:49:07.969 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.969 g = []
2025-07-01 17:49:07.973 if alo < ahi:
2025-07-01 17:49:07.973 if blo < bhi:
2025-07-01 17:49:07.973 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.973 else:
2025-07-01 17:49:07.973 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.973 elif blo < bhi:
2025-07-01 17:49:07.973 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.973
2025-07-01 17:49:07.973 > yield from g
2025-07-01 17:49:07.973
2025-07-01 17:49:07.973 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.973 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.974
2025-07-01 17:49:07.974 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.974 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.974 alo = 335, ahi = 1101
2025-07-01 17:49:07.974 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.974 blo = 335, bhi = 1101
2025-07-01 17:49:07.974
2025-07-01 17:49:07.974 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.974 r"""
2025-07-01 17:49:07.974 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.974 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.974 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.974 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.974
2025-07-01 17:49:07.974 Example:
2025-07-01 17:49:07.975
2025-07-01 17:49:07.975 >>> d = Differ()
2025-07-01 17:49:07.975 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.975 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.975 >>> print(''.join(results), end="")
2025-07-01 17:49:07.975 - abcDefghiJkl
2025-07-01 17:49:07.975 + abcdefGhijkl
2025-07-01 17:49:07.975 """
2025-07-01 17:49:07.975
2025-07-01 17:49:07.975 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.975 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.975 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.975 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.975 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.975
2025-07-01 17:49:07.976 # search for the pair that matches best without being identical
2025-07-01 17:49:07.976 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.976 # on junk -- unless we have to)
2025-07-01 17:49:07.976 for j in range(blo, bhi):
2025-07-01 17:49:07.976 bj = b[j]
2025-07-01 17:49:07.976 cruncher.set_seq2(bj)
2025-07-01 17:49:07.976 for i in range(alo, ahi):
2025-07-01 17:49:07.976 ai = a[i]
2025-07-01 17:49:07.976 if ai == bj:
2025-07-01 17:49:07.976 if eqi is None:
2025-07-01 17:49:07.976 eqi, eqj = i, j
2025-07-01 17:49:07.976 continue
2025-07-01 17:49:07.976 cruncher.set_seq1(ai)
2025-07-01 17:49:07.976 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.976 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.976 # compares by a factor of 3.
2025-07-01 17:49:07.977 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.977 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.977 # of the computation is cached by cruncher
2025-07-01 17:49:07.977 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.977 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.977 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.977 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.977 if best_ratio < cutoff:
2025-07-01 17:49:07.977 # no non-identical "pretty close" pair
2025-07-01 17:49:07.977 if eqi is None:
2025-07-01 17:49:07.977 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.977 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.977 return
2025-07-01 17:49:07.977 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.977 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.977 else:
2025-07-01 17:49:07.978 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.978 eqi = None
2025-07-01 17:49:07.978
2025-07-01 17:49:07.978 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.978 # identical
2025-07-01 17:49:07.978
2025-07-01 17:49:07.978 # pump out diffs from before the synch point
2025-07-01 17:49:07.978 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.978
2025-07-01 17:49:07.978 # do intraline marking on the synch pair
2025-07-01 17:49:07.978 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.978 if eqi is None:
2025-07-01 17:49:07.978 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.978 atags = btags = ""
2025-07-01 17:49:07.978 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.978 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.979 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.979 if tag == 'replace':
2025-07-01 17:49:07.979 atags += '^' * la
2025-07-01 17:49:07.979 btags += '^' * lb
2025-07-01 17:49:07.979 elif tag == 'delete':
2025-07-01 17:49:07.979 atags += '-' * la
2025-07-01 17:49:07.979 elif tag == 'insert':
2025-07-01 17:49:07.979 btags += '+' * lb
2025-07-01 17:49:07.979 elif tag == 'equal':
2025-07-01 17:49:07.979 atags += ' ' * la
2025-07-01 17:49:07.979 btags += ' ' * lb
2025-07-01 17:49:07.979 else:
2025-07-01 17:49:07.979 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.979 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.979 else:
2025-07-01 17:49:07.979 # the synch pair is identical
2025-07-01 17:49:07.980 yield ' ' + aelt
2025-07-01 17:49:07.980
2025-07-01 17:49:07.980 # pump out diffs from after the synch point
2025-07-01 17:49:07.980 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.980
2025-07-01 17:49:07.980 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.980 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.980
2025-07-01 17:49:07.980 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.980 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.980 alo = 336, ahi = 1101
2025-07-01 17:49:07.980 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.980 blo = 336, bhi = 1101
2025-07-01 17:49:07.980
2025-07-01 17:49:07.980 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.981 g = []
2025-07-01 17:49:07.981 if alo < ahi:
2025-07-01 17:49:07.981 if blo < bhi:
2025-07-01 17:49:07.981 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.981 else:
2025-07-01 17:49:07.981 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.981 elif blo < bhi:
2025-07-01 17:49:07.981 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.981
2025-07-01 17:49:07.981 > yield from g
2025-07-01 17:49:07.981
2025-07-01 17:49:07.981 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.981 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.981
2025-07-01 17:49:07.981 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.981 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.982 alo = 336, ahi = 1101
2025-07-01 17:49:07.982 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.982 blo = 336, bhi = 1101
2025-07-01 17:49:07.982
2025-07-01 17:49:07.982 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.982 r"""
2025-07-01 17:49:07.982 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.982 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.982 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.982 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.982
2025-07-01 17:49:07.982 Example:
2025-07-01 17:49:07.982
2025-07-01 17:49:07.982 >>> d = Differ()
2025-07-01 17:49:07.982 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.982 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.983 >>> print(''.join(results), end="")
2025-07-01 17:49:07.983 - abcDefghiJkl
2025-07-01 17:49:07.983 + abcdefGhijkl
2025-07-01 17:49:07.983 """
2025-07-01 17:49:07.983
2025-07-01 17:49:07.983 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.983 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.983 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.983 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.983 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.983
2025-07-01 17:49:07.983 # search for the pair that matches best without being identical
2025-07-01 17:49:07.983 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.983 # on junk -- unless we have to)
2025-07-01 17:49:07.983 for j in range(blo, bhi):
2025-07-01 17:49:07.983 bj = b[j]
2025-07-01 17:49:07.984 cruncher.set_seq2(bj)
2025-07-01 17:49:07.984 for i in range(alo, ahi):
2025-07-01 17:49:07.984 ai = a[i]
2025-07-01 17:49:07.984 if ai == bj:
2025-07-01 17:49:07.984 if eqi is None:
2025-07-01 17:49:07.984 eqi, eqj = i, j
2025-07-01 17:49:07.984 continue
2025-07-01 17:49:07.984 cruncher.set_seq1(ai)
2025-07-01 17:49:07.984 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.984 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.984 # compares by a factor of 3.
2025-07-01 17:49:07.984 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.984 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.984 # of the computation is cached by cruncher
2025-07-01 17:49:07.984 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.984 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.984 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.985 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.985 if best_ratio < cutoff:
2025-07-01 17:49:07.985 # no non-identical "pretty close" pair
2025-07-01 17:49:07.985 if eqi is None:
2025-07-01 17:49:07.985 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.985 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.985 return
2025-07-01 17:49:07.985 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.985 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.985 else:
2025-07-01 17:49:07.985 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.985 eqi = None
2025-07-01 17:49:07.985
2025-07-01 17:49:07.985 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.985 # identical
2025-07-01 17:49:07.985
2025-07-01 17:49:07.986 # pump out diffs from before the synch point
2025-07-01 17:49:07.991 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.991
2025-07-01 17:49:07.991 # do intraline marking on the synch pair
2025-07-01 17:49:07.991 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.991 if eqi is None:
2025-07-01 17:49:07.991 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.991 atags = btags = ""
2025-07-01 17:49:07.991 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.991 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.991 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.991 if tag == 'replace':
2025-07-01 17:49:07.991 atags += '^' * la
2025-07-01 17:49:07.991 btags += '^' * lb
2025-07-01 17:49:07.991 elif tag == 'delete':
2025-07-01 17:49:07.991 atags += '-' * la
2025-07-01 17:49:07.992 elif tag == 'insert':
2025-07-01 17:49:07.992 btags += '+' * lb
2025-07-01 17:49:07.992 elif tag == 'equal':
2025-07-01 17:49:07.992 atags += ' ' * la
2025-07-01 17:49:07.992 btags += ' ' * lb
2025-07-01 17:49:07.992 else:
2025-07-01 17:49:07.992 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:07.992 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:07.992 else:
2025-07-01 17:49:07.992 # the synch pair is identical
2025-07-01 17:49:07.992 yield ' ' + aelt
2025-07-01 17:49:07.992
2025-07-01 17:49:07.992 # pump out diffs from after the synch point
2025-07-01 17:49:07.992 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:07.992
2025-07-01 17:49:07.992 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:07.992 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.993
2025-07-01 17:49:07.993 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.993 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.993 alo = 337, ahi = 1101
2025-07-01 17:49:07.993 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.993 blo = 337, bhi = 1101
2025-07-01 17:49:07.993
2025-07-01 17:49:07.993 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.993 g = []
2025-07-01 17:49:07.993 if alo < ahi:
2025-07-01 17:49:07.993 if blo < bhi:
2025-07-01 17:49:07.993 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.993 else:
2025-07-01 17:49:07.993 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:07.993 elif blo < bhi:
2025-07-01 17:49:07.993 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:07.993
2025-07-01 17:49:07.994 > yield from g
2025-07-01 17:49:07.994
2025-07-01 17:49:07.994 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:07.994 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:07.994
2025-07-01 17:49:07.994 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:07.994 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:07.994 alo = 337, ahi = 1101
2025-07-01 17:49:07.994 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:07.994 blo = 337, bhi = 1101
2025-07-01 17:49:07.994
2025-07-01 17:49:07.994 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:07.994 r"""
2025-07-01 17:49:07.994 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:07.994 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:07.995 synch point, and intraline difference marking is done on the
2025-07-01 17:49:07.995 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:07.995
2025-07-01 17:49:07.995 Example:
2025-07-01 17:49:07.995
2025-07-01 17:49:07.995 >>> d = Differ()
2025-07-01 17:49:07.995 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:07.995 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:07.995 >>> print(''.join(results), end="")
2025-07-01 17:49:07.995 - abcDefghiJkl
2025-07-01 17:49:07.995 + abcdefGhijkl
2025-07-01 17:49:07.995 """
2025-07-01 17:49:07.995
2025-07-01 17:49:07.995 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:07.996 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:07.996 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:07.996 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:07.996 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:07.996
2025-07-01 17:49:07.996 # search for the pair that matches best without being identical
2025-07-01 17:49:07.996 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:07.996 # on junk -- unless we have to)
2025-07-01 17:49:07.996 for j in range(blo, bhi):
2025-07-01 17:49:07.996 bj = b[j]
2025-07-01 17:49:07.996 cruncher.set_seq2(bj)
2025-07-01 17:49:07.996 for i in range(alo, ahi):
2025-07-01 17:49:07.996 ai = a[i]
2025-07-01 17:49:07.996 if ai == bj:
2025-07-01 17:49:07.996 if eqi is None:
2025-07-01 17:49:07.996 eqi, eqj = i, j
2025-07-01 17:49:07.996 continue
2025-07-01 17:49:07.997 cruncher.set_seq1(ai)
2025-07-01 17:49:07.997 # computing similarity is expensive, so use the quick
2025-07-01 17:49:07.997 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:07.997 # compares by a factor of 3.
2025-07-01 17:49:07.997 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:07.997 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:07.997 # of the computation is cached by cruncher
2025-07-01 17:49:07.997 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:07.997 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:07.997 cruncher.ratio() > best_ratio:
2025-07-01 17:49:07.997 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:07.997 if best_ratio < cutoff:
2025-07-01 17:49:07.997 # no non-identical "pretty close" pair
2025-07-01 17:49:07.997 if eqi is None:
2025-07-01 17:49:07.997 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:07.998 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:07.998 return
2025-07-01 17:49:07.998 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:07.998 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:07.998 else:
2025-07-01 17:49:07.998 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:07.998 eqi = None
2025-07-01 17:49:07.998
2025-07-01 17:49:07.998 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:07.998 # identical
2025-07-01 17:49:07.998
2025-07-01 17:49:07.998 # pump out diffs from before the synch point
2025-07-01 17:49:07.998 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:07.998
2025-07-01 17:49:07.998 # do intraline marking on the synch pair
2025-07-01 17:49:07.998 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:07.999 if eqi is None:
2025-07-01 17:49:07.999 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:07.999 atags = btags = ""
2025-07-01 17:49:07.999 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:07.999 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:07.999 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:07.999 if tag == 'replace':
2025-07-01 17:49:07.999 atags += '^' * la
2025-07-01 17:49:07.999 btags += '^' * lb
2025-07-01 17:49:07.999 elif tag == 'delete':
2025-07-01 17:49:07.999 atags += '-' * la
2025-07-01 17:49:07.999 elif tag == 'insert':
2025-07-01 17:49:07.999 btags += '+' * lb
2025-07-01 17:49:07.999 elif tag == 'equal':
2025-07-01 17:49:07.999 atags += ' ' * la
2025-07-01 17:49:07.999 btags += ' ' * lb
2025-07-01 17:49:07.999 else:
2025-07-01 17:49:07.999 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.000 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.000 else:
2025-07-01 17:49:08.000 # the synch pair is identical
2025-07-01 17:49:08.000 yield ' ' + aelt
2025-07-01 17:49:08.000
2025-07-01 17:49:08.000 # pump out diffs from after the synch point
2025-07-01 17:49:08.000 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.000
2025-07-01 17:49:08.000 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.000 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.000
2025-07-01 17:49:08.000 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.000 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.000 alo = 338, ahi = 1101
2025-07-01 17:49:08.000 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.000 blo = 338, bhi = 1101
2025-07-01 17:49:08.000
2025-07-01 17:49:08.001 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.006 g = []
2025-07-01 17:49:08.006 if alo < ahi:
2025-07-01 17:49:08.006 if blo < bhi:
2025-07-01 17:49:08.006 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.006 else:
2025-07-01 17:49:08.006 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.006 elif blo < bhi:
2025-07-01 17:49:08.006 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.006
2025-07-01 17:49:08.006 > yield from g
2025-07-01 17:49:08.006
2025-07-01 17:49:08.006 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.006 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.007
2025-07-01 17:49:08.007 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.007 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.007 alo = 338, ahi = 1101
2025-07-01 17:49:08.007 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.007 blo = 338, bhi = 1101
2025-07-01 17:49:08.007
2025-07-01 17:49:08.007 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.007 r"""
2025-07-01 17:49:08.007 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.007 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.007 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.007 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.007
2025-07-01 17:49:08.007 Example:
2025-07-01 17:49:08.007
2025-07-01 17:49:08.008 >>> d = Differ()
2025-07-01 17:49:08.008 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.008 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.008 >>> print(''.join(results), end="")
2025-07-01 17:49:08.008 - abcDefghiJkl
2025-07-01 17:49:08.008 + abcdefGhijkl
2025-07-01 17:49:08.008 """
2025-07-01 17:49:08.008
2025-07-01 17:49:08.008 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.008 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.008 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.008 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.008 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.008
2025-07-01 17:49:08.008 # search for the pair that matches best without being identical
2025-07-01 17:49:08.009 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.009 # on junk -- unless we have to)
2025-07-01 17:49:08.009 for j in range(blo, bhi):
2025-07-01 17:49:08.009 bj = b[j]
2025-07-01 17:49:08.009 cruncher.set_seq2(bj)
2025-07-01 17:49:08.009 for i in range(alo, ahi):
2025-07-01 17:49:08.009 ai = a[i]
2025-07-01 17:49:08.009 if ai == bj:
2025-07-01 17:49:08.009 if eqi is None:
2025-07-01 17:49:08.009 eqi, eqj = i, j
2025-07-01 17:49:08.009 continue
2025-07-01 17:49:08.009 cruncher.set_seq1(ai)
2025-07-01 17:49:08.009 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.009 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.010 # compares by a factor of 3.
2025-07-01 17:49:08.010 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.010 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.010 # of the computation is cached by cruncher
2025-07-01 17:49:08.010 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.010 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.010 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.010 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.010 if best_ratio < cutoff:
2025-07-01 17:49:08.010 # no non-identical "pretty close" pair
2025-07-01 17:49:08.010 if eqi is None:
2025-07-01 17:49:08.010 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.010 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.010 return
2025-07-01 17:49:08.010 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.011 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.011 else:
2025-07-01 17:49:08.011 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.011 eqi = None
2025-07-01 17:49:08.011
2025-07-01 17:49:08.011 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.011 # identical
2025-07-01 17:49:08.011
2025-07-01 17:49:08.011 # pump out diffs from before the synch point
2025-07-01 17:49:08.011 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.011
2025-07-01 17:49:08.011 # do intraline marking on the synch pair
2025-07-01 17:49:08.011 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.011 if eqi is None:
2025-07-01 17:49:08.011 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.011 atags = btags = ""
2025-07-01 17:49:08.012 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.012 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.012 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.012 if tag == 'replace':
2025-07-01 17:49:08.012 atags += '^' * la
2025-07-01 17:49:08.012 btags += '^' * lb
2025-07-01 17:49:08.012 elif tag == 'delete':
2025-07-01 17:49:08.012 atags += '-' * la
2025-07-01 17:49:08.012 elif tag == 'insert':
2025-07-01 17:49:08.012 btags += '+' * lb
2025-07-01 17:49:08.012 elif tag == 'equal':
2025-07-01 17:49:08.012 atags += ' ' * la
2025-07-01 17:49:08.012 btags += ' ' * lb
2025-07-01 17:49:08.012 else:
2025-07-01 17:49:08.012 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.012 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.013 else:
2025-07-01 17:49:08.013 # the synch pair is identical
2025-07-01 17:49:08.013 yield ' ' + aelt
2025-07-01 17:49:08.013
2025-07-01 17:49:08.013 # pump out diffs from after the synch point
2025-07-01 17:49:08.013 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.013
2025-07-01 17:49:08.013 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.013 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.013
2025-07-01 17:49:08.013 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.013 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.013 alo = 339, ahi = 1101
2025-07-01 17:49:08.013 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.013 blo = 339, bhi = 1101
2025-07-01 17:49:08.013
2025-07-01 17:49:08.013 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.014 g = []
2025-07-01 17:49:08.014 if alo < ahi:
2025-07-01 17:49:08.014 if blo < bhi:
2025-07-01 17:49:08.014 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.014 else:
2025-07-01 17:49:08.014 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.014 elif blo < bhi:
2025-07-01 17:49:08.014 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.014
2025-07-01 17:49:08.014 > yield from g
2025-07-01 17:49:08.014
2025-07-01 17:49:08.014 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.014 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.014
2025-07-01 17:49:08.014 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.014 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.014 alo = 339, ahi = 1101
2025-07-01 17:49:08.015 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.015 blo = 339, bhi = 1101
2025-07-01 17:49:08.015
2025-07-01 17:49:08.015 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.015 r"""
2025-07-01 17:49:08.015 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.015 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.015 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.015 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.015
2025-07-01 17:49:08.015 Example:
2025-07-01 17:49:08.015
2025-07-01 17:49:08.015 >>> d = Differ()
2025-07-01 17:49:08.015 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.015 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.015 >>> print(''.join(results), end="")
2025-07-01 17:49:08.016 - abcDefghiJkl
2025-07-01 17:49:08.021 + abcdefGhijkl
2025-07-01 17:49:08.021 """
2025-07-01 17:49:08.021
2025-07-01 17:49:08.021 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.021 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.021 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.021 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.021 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.021
2025-07-01 17:49:08.021 # search for the pair that matches best without being identical
2025-07-01 17:49:08.021 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.021 # on junk -- unless we have to)
2025-07-01 17:49:08.021 for j in range(blo, bhi):
2025-07-01 17:49:08.021 bj = b[j]
2025-07-01 17:49:08.021 cruncher.set_seq2(bj)
2025-07-01 17:49:08.022 for i in range(alo, ahi):
2025-07-01 17:49:08.022 ai = a[i]
2025-07-01 17:49:08.022 if ai == bj:
2025-07-01 17:49:08.022 if eqi is None:
2025-07-01 17:49:08.022 eqi, eqj = i, j
2025-07-01 17:49:08.022 continue
2025-07-01 17:49:08.022 cruncher.set_seq1(ai)
2025-07-01 17:49:08.022 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.022 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.022 # compares by a factor of 3.
2025-07-01 17:49:08.022 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.022 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.022 # of the computation is cached by cruncher
2025-07-01 17:49:08.022 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.022 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.022 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.023 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.023 if best_ratio < cutoff:
2025-07-01 17:49:08.023 # no non-identical "pretty close" pair
2025-07-01 17:49:08.023 if eqi is None:
2025-07-01 17:49:08.023 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.023 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.023 return
2025-07-01 17:49:08.023 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.023 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.023 else:
2025-07-01 17:49:08.023 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.023 eqi = None
2025-07-01 17:49:08.023
2025-07-01 17:49:08.023 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.023 # identical
2025-07-01 17:49:08.023
2025-07-01 17:49:08.023 # pump out diffs from before the synch point
2025-07-01 17:49:08.024 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.024
2025-07-01 17:49:08.024 # do intraline marking on the synch pair
2025-07-01 17:49:08.024 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.024 if eqi is None:
2025-07-01 17:49:08.024 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.024 atags = btags = ""
2025-07-01 17:49:08.024 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.024 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.024 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.024 if tag == 'replace':
2025-07-01 17:49:08.024 atags += '^' * la
2025-07-01 17:49:08.024 btags += '^' * lb
2025-07-01 17:49:08.024 elif tag == 'delete':
2025-07-01 17:49:08.024 atags += '-' * la
2025-07-01 17:49:08.024 elif tag == 'insert':
2025-07-01 17:49:08.024 btags += '+' * lb
2025-07-01 17:49:08.025 elif tag == 'equal':
2025-07-01 17:49:08.025 atags += ' ' * la
2025-07-01 17:49:08.025 btags += ' ' * lb
2025-07-01 17:49:08.025 else:
2025-07-01 17:49:08.025 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.025 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.025 else:
2025-07-01 17:49:08.025 # the synch pair is identical
2025-07-01 17:49:08.025 yield ' ' + aelt
2025-07-01 17:49:08.025
2025-07-01 17:49:08.025 # pump out diffs from after the synch point
2025-07-01 17:49:08.025 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.025
2025-07-01 17:49:08.025 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.025 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.025
2025-07-01 17:49:08.025 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.025 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.026 alo = 340, ahi = 1101
2025-07-01 17:49:08.026 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.026 blo = 340, bhi = 1101
2025-07-01 17:49:08.026
2025-07-01 17:49:08.026 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.026 g = []
2025-07-01 17:49:08.026 if alo < ahi:
2025-07-01 17:49:08.026 if blo < bhi:
2025-07-01 17:49:08.026 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.026 else:
2025-07-01 17:49:08.026 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.026 elif blo < bhi:
2025-07-01 17:49:08.026 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.026
2025-07-01 17:49:08.026 > yield from g
2025-07-01 17:49:08.027
2025-07-01 17:49:08.027 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.027 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.027
2025-07-01 17:49:08.027 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.027 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.027 alo = 340, ahi = 1101
2025-07-01 17:49:08.027 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.027 blo = 340, bhi = 1101
2025-07-01 17:49:08.027
2025-07-01 17:49:08.027 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.027 r"""
2025-07-01 17:49:08.027 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.027 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.027 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.027 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.028
2025-07-01 17:49:08.028 Example:
2025-07-01 17:49:08.028
2025-07-01 17:49:08.028 >>> d = Differ()
2025-07-01 17:49:08.028 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.028 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.028 >>> print(''.join(results), end="")
2025-07-01 17:49:08.028 - abcDefghiJkl
2025-07-01 17:49:08.028 + abcdefGhijkl
2025-07-01 17:49:08.028 """
2025-07-01 17:49:08.028
2025-07-01 17:49:08.028 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.028 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.028 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.028 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.029 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.029
2025-07-01 17:49:08.029 # search for the pair that matches best without being identical
2025-07-01 17:49:08.029 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.029 # on junk -- unless we have to)
2025-07-01 17:49:08.029 for j in range(blo, bhi):
2025-07-01 17:49:08.029 bj = b[j]
2025-07-01 17:49:08.029 cruncher.set_seq2(bj)
2025-07-01 17:49:08.029 for i in range(alo, ahi):
2025-07-01 17:49:08.029 ai = a[i]
2025-07-01 17:49:08.029 if ai == bj:
2025-07-01 17:49:08.029 if eqi is None:
2025-07-01 17:49:08.029 eqi, eqj = i, j
2025-07-01 17:49:08.029 continue
2025-07-01 17:49:08.029 cruncher.set_seq1(ai)
2025-07-01 17:49:08.029 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.030 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.030 # compares by a factor of 3.
2025-07-01 17:49:08.030 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.030 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.030 # of the computation is cached by cruncher
2025-07-01 17:49:08.030 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.030 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.030 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.030 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.030 if best_ratio < cutoff:
2025-07-01 17:49:08.030 # no non-identical "pretty close" pair
2025-07-01 17:49:08.030 if eqi is None:
2025-07-01 17:49:08.030 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.030 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.030 return
2025-07-01 17:49:08.030 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.030 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.031 else:
2025-07-01 17:49:08.031 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.031 eqi = None
2025-07-01 17:49:08.031
2025-07-01 17:49:08.031 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.031 # identical
2025-07-01 17:49:08.031
2025-07-01 17:49:08.031 # pump out diffs from before the synch point
2025-07-01 17:49:08.031 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.031
2025-07-01 17:49:08.031 # do intraline marking on the synch pair
2025-07-01 17:49:08.031 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.031 if eqi is None:
2025-07-01 17:49:08.031 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.031 atags = btags = ""
2025-07-01 17:49:08.031 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.031 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.032 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.037 if tag == 'replace':
2025-07-01 17:49:08.037 atags += '^' * la
2025-07-01 17:49:08.037 btags += '^' * lb
2025-07-01 17:49:08.037 elif tag == 'delete':
2025-07-01 17:49:08.037 atags += '-' * la
2025-07-01 17:49:08.037 elif tag == 'insert':
2025-07-01 17:49:08.037 btags += '+' * lb
2025-07-01 17:49:08.037 elif tag == 'equal':
2025-07-01 17:49:08.037 atags += ' ' * la
2025-07-01 17:49:08.037 btags += ' ' * lb
2025-07-01 17:49:08.037 else:
2025-07-01 17:49:08.037 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.037 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.038 else:
2025-07-01 17:49:08.038 # the synch pair is identical
2025-07-01 17:49:08.038 yield ' ' + aelt
2025-07-01 17:49:08.038
2025-07-01 17:49:08.038 # pump out diffs from after the synch point
2025-07-01 17:49:08.038 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.038
2025-07-01 17:49:08.038 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.038 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.038
2025-07-01 17:49:08.038 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.038 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.038 alo = 341, ahi = 1101
2025-07-01 17:49:08.038 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.038 blo = 341, bhi = 1101
2025-07-01 17:49:08.038
2025-07-01 17:49:08.038 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.038 g = []
2025-07-01 17:49:08.039 if alo < ahi:
2025-07-01 17:49:08.039 if blo < bhi:
2025-07-01 17:49:08.039 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.039 else:
2025-07-01 17:49:08.039 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.039 elif blo < bhi:
2025-07-01 17:49:08.039 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.039
2025-07-01 17:49:08.039 > yield from g
2025-07-01 17:49:08.039
2025-07-01 17:49:08.039 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.039 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.039
2025-07-01 17:49:08.039 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.039 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.039 alo = 341, ahi = 1101
2025-07-01 17:49:08.039 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.040 blo = 341, bhi = 1101
2025-07-01 17:49:08.040
2025-07-01 17:49:08.040 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.040 r"""
2025-07-01 17:49:08.040 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.040 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.040 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.040 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.040
2025-07-01 17:49:08.040 Example:
2025-07-01 17:49:08.040
2025-07-01 17:49:08.040 >>> d = Differ()
2025-07-01 17:49:08.040 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.040 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.040 >>> print(''.join(results), end="")
2025-07-01 17:49:08.040 - abcDefghiJkl
2025-07-01 17:49:08.041 + abcdefGhijkl
2025-07-01 17:49:08.041 """
2025-07-01 17:49:08.041
2025-07-01 17:49:08.041 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.041 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.041 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.041 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.041 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.041
2025-07-01 17:49:08.041 # search for the pair that matches best without being identical
2025-07-01 17:49:08.041 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.041 # on junk -- unless we have to)
2025-07-01 17:49:08.041 for j in range(blo, bhi):
2025-07-01 17:49:08.042 bj = b[j]
2025-07-01 17:49:08.042 cruncher.set_seq2(bj)
2025-07-01 17:49:08.042 for i in range(alo, ahi):
2025-07-01 17:49:08.042 ai = a[i]
2025-07-01 17:49:08.042 if ai == bj:
2025-07-01 17:49:08.042 if eqi is None:
2025-07-01 17:49:08.042 eqi, eqj = i, j
2025-07-01 17:49:08.042 continue
2025-07-01 17:49:08.042 cruncher.set_seq1(ai)
2025-07-01 17:49:08.042 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.042 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.042 # compares by a factor of 3.
2025-07-01 17:49:08.042 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.042 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.042 # of the computation is cached by cruncher
2025-07-01 17:49:08.042 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.043 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.043 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.043 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.043 if best_ratio < cutoff:
2025-07-01 17:49:08.043 # no non-identical "pretty close" pair
2025-07-01 17:49:08.043 if eqi is None:
2025-07-01 17:49:08.043 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.043 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.043 return
2025-07-01 17:49:08.043 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.043 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.043 else:
2025-07-01 17:49:08.043 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.043 eqi = None
2025-07-01 17:49:08.043
2025-07-01 17:49:08.043 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.043 # identical
2025-07-01 17:49:08.044
2025-07-01 17:49:08.044 # pump out diffs from before the synch point
2025-07-01 17:49:08.044 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.044
2025-07-01 17:49:08.044 # do intraline marking on the synch pair
2025-07-01 17:49:08.044 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.044 if eqi is None:
2025-07-01 17:49:08.044 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.044 atags = btags = ""
2025-07-01 17:49:08.044 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.044 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.044 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.044 if tag == 'replace':
2025-07-01 17:49:08.044 atags += '^' * la
2025-07-01 17:49:08.044 btags += '^' * lb
2025-07-01 17:49:08.044 elif tag == 'delete':
2025-07-01 17:49:08.044 atags += '-' * la
2025-07-01 17:49:08.045 elif tag == 'insert':
2025-07-01 17:49:08.045 btags += '+' * lb
2025-07-01 17:49:08.045 elif tag == 'equal':
2025-07-01 17:49:08.045 atags += ' ' * la
2025-07-01 17:49:08.045 btags += ' ' * lb
2025-07-01 17:49:08.045 else:
2025-07-01 17:49:08.045 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.045 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.045 else:
2025-07-01 17:49:08.045 # the synch pair is identical
2025-07-01 17:49:08.045 yield ' ' + aelt
2025-07-01 17:49:08.045
2025-07-01 17:49:08.045 # pump out diffs from after the synch point
2025-07-01 17:49:08.045 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.045
2025-07-01 17:49:08.045 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.045 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.046
2025-07-01 17:49:08.046 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.046 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.046 alo = 342, ahi = 1101
2025-07-01 17:49:08.046 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.046 blo = 342, bhi = 1101
2025-07-01 17:49:08.046
2025-07-01 17:49:08.046 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.046 g = []
2025-07-01 17:49:08.046 if alo < ahi:
2025-07-01 17:49:08.046 if blo < bhi:
2025-07-01 17:49:08.046 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.046 else:
2025-07-01 17:49:08.046 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.046 elif blo < bhi:
2025-07-01 17:49:08.046 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.046
2025-07-01 17:49:08.047 > yield from g
2025-07-01 17:49:08.047
2025-07-01 17:49:08.047 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.047 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.047
2025-07-01 17:49:08.047 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.047 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.047 alo = 342, ahi = 1101
2025-07-01 17:49:08.047 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.047 blo = 342, bhi = 1101
2025-07-01 17:49:08.047
2025-07-01 17:49:08.047 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.047 r"""
2025-07-01 17:49:08.047 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.047 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.048 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.053 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.053
2025-07-01 17:49:08.053 Example:
2025-07-01 17:49:08.053
2025-07-01 17:49:08.053 >>> d = Differ()
2025-07-01 17:49:08.053 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.053 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.053 >>> print(''.join(results), end="")
2025-07-01 17:49:08.053 - abcDefghiJkl
2025-07-01 17:49:08.053 + abcdefGhijkl
2025-07-01 17:49:08.053 """
2025-07-01 17:49:08.054
2025-07-01 17:49:08.054 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.054 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.054 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.054 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.054 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.054
2025-07-01 17:49:08.054 # search for the pair that matches best without being identical
2025-07-01 17:49:08.054 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.054 # on junk -- unless we have to)
2025-07-01 17:49:08.054 for j in range(blo, bhi):
2025-07-01 17:49:08.054 bj = b[j]
2025-07-01 17:49:08.054 cruncher.set_seq2(bj)
2025-07-01 17:49:08.054 for i in range(alo, ahi):
2025-07-01 17:49:08.054 ai = a[i]
2025-07-01 17:49:08.054 if ai == bj:
2025-07-01 17:49:08.054 if eqi is None:
2025-07-01 17:49:08.055 eqi, eqj = i, j
2025-07-01 17:49:08.055 continue
2025-07-01 17:49:08.055 cruncher.set_seq1(ai)
2025-07-01 17:49:08.055 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.055 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.055 # compares by a factor of 3.
2025-07-01 17:49:08.055 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.055 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.055 # of the computation is cached by cruncher
2025-07-01 17:49:08.055 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.055 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.055 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.055 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.055 if best_ratio < cutoff:
2025-07-01 17:49:08.055 # no non-identical "pretty close" pair
2025-07-01 17:49:08.055 if eqi is None:
2025-07-01 17:49:08.056 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.056 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.056 return
2025-07-01 17:49:08.056 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.056 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.056 else:
2025-07-01 17:49:08.056 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.056 eqi = None
2025-07-01 17:49:08.056
2025-07-01 17:49:08.056 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.056 # identical
2025-07-01 17:49:08.056
2025-07-01 17:49:08.056 # pump out diffs from before the synch point
2025-07-01 17:49:08.056 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.056
2025-07-01 17:49:08.056 # do intraline marking on the synch pair
2025-07-01 17:49:08.056 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.057 if eqi is None:
2025-07-01 17:49:08.057 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.057 atags = btags = ""
2025-07-01 17:49:08.057 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.057 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.057 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.057 if tag == 'replace':
2025-07-01 17:49:08.057 atags += '^' * la
2025-07-01 17:49:08.057 btags += '^' * lb
2025-07-01 17:49:08.057 elif tag == 'delete':
2025-07-01 17:49:08.057 atags += '-' * la
2025-07-01 17:49:08.057 elif tag == 'insert':
2025-07-01 17:49:08.057 btags += '+' * lb
2025-07-01 17:49:08.057 elif tag == 'equal':
2025-07-01 17:49:08.057 atags += ' ' * la
2025-07-01 17:49:08.058 btags += ' ' * lb
2025-07-01 17:49:08.058 else:
2025-07-01 17:49:08.058 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.058 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.058 else:
2025-07-01 17:49:08.058 # the synch pair is identical
2025-07-01 17:49:08.058 yield ' ' + aelt
2025-07-01 17:49:08.058
2025-07-01 17:49:08.058 # pump out diffs from after the synch point
2025-07-01 17:49:08.058 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.058
2025-07-01 17:49:08.058 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.058 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.058
2025-07-01 17:49:08.058 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.058 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.059 alo = 343, ahi = 1101
2025-07-01 17:49:08.059 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.059 blo = 343, bhi = 1101
2025-07-01 17:49:08.059
2025-07-01 17:49:08.059 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.059 g = []
2025-07-01 17:49:08.059 if alo < ahi:
2025-07-01 17:49:08.059 if blo < bhi:
2025-07-01 17:49:08.059 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.059 else:
2025-07-01 17:49:08.059 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.059 elif blo < bhi:
2025-07-01 17:49:08.059 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.059
2025-07-01 17:49:08.059 > yield from g
2025-07-01 17:49:08.059
2025-07-01 17:49:08.059 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.060 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.060
2025-07-01 17:49:08.060 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.060 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.060 alo = 343, ahi = 1101
2025-07-01 17:49:08.060 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.060 blo = 343, bhi = 1101
2025-07-01 17:49:08.060
2025-07-01 17:49:08.060 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.060 r"""
2025-07-01 17:49:08.060 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.060 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.060 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.060 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.060
2025-07-01 17:49:08.060 Example:
2025-07-01 17:49:08.061
2025-07-01 17:49:08.061 >>> d = Differ()
2025-07-01 17:49:08.061 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.061 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.061 >>> print(''.join(results), end="")
2025-07-01 17:49:08.061 - abcDefghiJkl
2025-07-01 17:49:08.061 + abcdefGhijkl
2025-07-01 17:49:08.061 """
2025-07-01 17:49:08.061
2025-07-01 17:49:08.061 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.061 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.061 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.061 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.061 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.061
2025-07-01 17:49:08.062 # search for the pair that matches best without being identical
2025-07-01 17:49:08.062 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.062 # on junk -- unless we have to)
2025-07-01 17:49:08.062 for j in range(blo, bhi):
2025-07-01 17:49:08.062 bj = b[j]
2025-07-01 17:49:08.062 cruncher.set_seq2(bj)
2025-07-01 17:49:08.062 for i in range(alo, ahi):
2025-07-01 17:49:08.062 ai = a[i]
2025-07-01 17:49:08.062 if ai == bj:
2025-07-01 17:49:08.062 if eqi is None:
2025-07-01 17:49:08.062 eqi, eqj = i, j
2025-07-01 17:49:08.062 continue
2025-07-01 17:49:08.062 cruncher.set_seq1(ai)
2025-07-01 17:49:08.062 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.062 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.062 # compares by a factor of 3.
2025-07-01 17:49:08.062 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.062 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.063 # of the computation is cached by cruncher
2025-07-01 17:49:08.063 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.063 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.063 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.063 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.063 if best_ratio < cutoff:
2025-07-01 17:49:08.063 # no non-identical "pretty close" pair
2025-07-01 17:49:08.063 if eqi is None:
2025-07-01 17:49:08.063 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.063 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.063 return
2025-07-01 17:49:08.063 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.063 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.063 else:
2025-07-01 17:49:08.063 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.063 eqi = None
2025-07-01 17:49:08.069
2025-07-01 17:49:08.069 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.069 # identical
2025-07-01 17:49:08.069
2025-07-01 17:49:08.069 # pump out diffs from before the synch point
2025-07-01 17:49:08.069 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.069
2025-07-01 17:49:08.069 # do intraline marking on the synch pair
2025-07-01 17:49:08.069 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.069 if eqi is None:
2025-07-01 17:49:08.069 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.069 atags = btags = ""
2025-07-01 17:49:08.069 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.069 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.070 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.070 if tag == 'replace':
2025-07-01 17:49:08.070 atags += '^' * la
2025-07-01 17:49:08.070 btags += '^' * lb
2025-07-01 17:49:08.070 elif tag == 'delete':
2025-07-01 17:49:08.070 atags += '-' * la
2025-07-01 17:49:08.070 elif tag == 'insert':
2025-07-01 17:49:08.070 btags += '+' * lb
2025-07-01 17:49:08.070 elif tag == 'equal':
2025-07-01 17:49:08.070 atags += ' ' * la
2025-07-01 17:49:08.070 btags += ' ' * lb
2025-07-01 17:49:08.070 else:
2025-07-01 17:49:08.070 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.070 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.070 else:
2025-07-01 17:49:08.070 # the synch pair is identical
2025-07-01 17:49:08.070 yield ' ' + aelt
2025-07-01 17:49:08.071
2025-07-01 17:49:08.071 # pump out diffs from after the synch point
2025-07-01 17:49:08.071 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.071
2025-07-01 17:49:08.071 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.071 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.071
2025-07-01 17:49:08.071 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.071 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.071 alo = 344, ahi = 1101
2025-07-01 17:49:08.071 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.071 blo = 344, bhi = 1101
2025-07-01 17:49:08.071
2025-07-01 17:49:08.071 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.071 g = []
2025-07-01 17:49:08.071 if alo < ahi:
2025-07-01 17:49:08.072 if blo < bhi:
2025-07-01 17:49:08.072 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.072 else:
2025-07-01 17:49:08.072 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.072 elif blo < bhi:
2025-07-01 17:49:08.072 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.072
2025-07-01 17:49:08.072 > yield from g
2025-07-01 17:49:08.072
2025-07-01 17:49:08.072 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.072 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.072
2025-07-01 17:49:08.072 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.072 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.072 alo = 344, ahi = 1101
2025-07-01 17:49:08.072 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.073 blo = 344, bhi = 1101
2025-07-01 17:49:08.073
2025-07-01 17:49:08.073 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.073 r"""
2025-07-01 17:49:08.073 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.073 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.073 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.073 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.073
2025-07-01 17:49:08.073 Example:
2025-07-01 17:49:08.073
2025-07-01 17:49:08.073 >>> d = Differ()
2025-07-01 17:49:08.073 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.073 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.073 >>> print(''.join(results), end="")
2025-07-01 17:49:08.074 - abcDefghiJkl
2025-07-01 17:49:08.074 + abcdefGhijkl
2025-07-01 17:49:08.074 """
2025-07-01 17:49:08.074
2025-07-01 17:49:08.074 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.074 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.074 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.074 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.074 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.074
2025-07-01 17:49:08.074 # search for the pair that matches best without being identical
2025-07-01 17:49:08.074 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.074 # on junk -- unless we have to)
2025-07-01 17:49:08.074 for j in range(blo, bhi):
2025-07-01 17:49:08.074 bj = b[j]
2025-07-01 17:49:08.075 cruncher.set_seq2(bj)
2025-07-01 17:49:08.075 for i in range(alo, ahi):
2025-07-01 17:49:08.075 ai = a[i]
2025-07-01 17:49:08.075 if ai == bj:
2025-07-01 17:49:08.075 if eqi is None:
2025-07-01 17:49:08.075 eqi, eqj = i, j
2025-07-01 17:49:08.075 continue
2025-07-01 17:49:08.075 cruncher.set_seq1(ai)
2025-07-01 17:49:08.075 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.075 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.075 # compares by a factor of 3.
2025-07-01 17:49:08.075 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.075 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.075 # of the computation is cached by cruncher
2025-07-01 17:49:08.075 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.075 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.076 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.076 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.076 if best_ratio < cutoff:
2025-07-01 17:49:08.076 # no non-identical "pretty close" pair
2025-07-01 17:49:08.076 if eqi is None:
2025-07-01 17:49:08.076 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.076 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.076 return
2025-07-01 17:49:08.076 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.076 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.076 else:
2025-07-01 17:49:08.076 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.076 eqi = None
2025-07-01 17:49:08.076
2025-07-01 17:49:08.076 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.076 # identical
2025-07-01 17:49:08.076
2025-07-01 17:49:08.077 # pump out diffs from before the synch point
2025-07-01 17:49:08.077 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.077
2025-07-01 17:49:08.077 # do intraline marking on the synch pair
2025-07-01 17:49:08.077 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.077 if eqi is None:
2025-07-01 17:49:08.077 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.077 atags = btags = ""
2025-07-01 17:49:08.077 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.077 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.077 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.077 if tag == 'replace':
2025-07-01 17:49:08.077 atags += '^' * la
2025-07-01 17:49:08.077 btags += '^' * lb
2025-07-01 17:49:08.077 elif tag == 'delete':
2025-07-01 17:49:08.077 atags += '-' * la
2025-07-01 17:49:08.077 elif tag == 'insert':
2025-07-01 17:49:08.078 btags += '+' * lb
2025-07-01 17:49:08.078 elif tag == 'equal':
2025-07-01 17:49:08.078 atags += ' ' * la
2025-07-01 17:49:08.078 btags += ' ' * lb
2025-07-01 17:49:08.078 else:
2025-07-01 17:49:08.078 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.078 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.078 else:
2025-07-01 17:49:08.078 # the synch pair is identical
2025-07-01 17:49:08.078 yield ' ' + aelt
2025-07-01 17:49:08.078
2025-07-01 17:49:08.078 # pump out diffs from after the synch point
2025-07-01 17:49:08.078 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.078
2025-07-01 17:49:08.078 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.078 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.079
2025-07-01 17:49:08.083 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.083 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.084 alo = 345, ahi = 1101
2025-07-01 17:49:08.084 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.084 blo = 345, bhi = 1101
2025-07-01 17:49:08.084
2025-07-01 17:49:08.084 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.084 g = []
2025-07-01 17:49:08.084 if alo < ahi:
2025-07-01 17:49:08.084 if blo < bhi:
2025-07-01 17:49:08.084 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.084 else:
2025-07-01 17:49:08.084 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.084 elif blo < bhi:
2025-07-01 17:49:08.084 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.084
2025-07-01 17:49:08.084 > yield from g
2025-07-01 17:49:08.084
2025-07-01 17:49:08.085 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.085 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.085
2025-07-01 17:49:08.085 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.085 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.085 alo = 345, ahi = 1101
2025-07-01 17:49:08.085 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.085 blo = 345, bhi = 1101
2025-07-01 17:49:08.085
2025-07-01 17:49:08.085 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.085 r"""
2025-07-01 17:49:08.085 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.085 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.085 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.085 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.085
2025-07-01 17:49:08.085 Example:
2025-07-01 17:49:08.085
2025-07-01 17:49:08.085 >>> d = Differ()
2025-07-01 17:49:08.085 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.086 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.086 >>> print(''.join(results), end="")
2025-07-01 17:49:08.086 - abcDefghiJkl
2025-07-01 17:49:08.086 + abcdefGhijkl
2025-07-01 17:49:08.086 """
2025-07-01 17:49:08.086
2025-07-01 17:49:08.086 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.086 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.086 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.086 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.086 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.086
2025-07-01 17:49:08.086 # search for the pair that matches best without being identical
2025-07-01 17:49:08.086 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.086 # on junk -- unless we have to)
2025-07-01 17:49:08.086 for j in range(blo, bhi):
2025-07-01 17:49:08.086 bj = b[j]
2025-07-01 17:49:08.086 cruncher.set_seq2(bj)
2025-07-01 17:49:08.086 for i in range(alo, ahi):
2025-07-01 17:49:08.087 ai = a[i]
2025-07-01 17:49:08.087 if ai == bj:
2025-07-01 17:49:08.087 if eqi is None:
2025-07-01 17:49:08.087 eqi, eqj = i, j
2025-07-01 17:49:08.087 continue
2025-07-01 17:49:08.087 cruncher.set_seq1(ai)
2025-07-01 17:49:08.087 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.087 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.087 # compares by a factor of 3.
2025-07-01 17:49:08.087 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.087 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.087 # of the computation is cached by cruncher
2025-07-01 17:49:08.087 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.087 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.087 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.087 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.087 if best_ratio < cutoff:
2025-07-01 17:49:08.087 # no non-identical "pretty close" pair
2025-07-01 17:49:08.088 if eqi is None:
2025-07-01 17:49:08.088 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.088 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.088 return
2025-07-01 17:49:08.088 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.088 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.088 else:
2025-07-01 17:49:08.088 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.088 eqi = None
2025-07-01 17:49:08.088
2025-07-01 17:49:08.088 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.088 # identical
2025-07-01 17:49:08.088
2025-07-01 17:49:08.088 # pump out diffs from before the synch point
2025-07-01 17:49:08.088 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.088
2025-07-01 17:49:08.088 # do intraline marking on the synch pair
2025-07-01 17:49:08.088 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.088 if eqi is None:
2025-07-01 17:49:08.088 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.089 atags = btags = ""
2025-07-01 17:49:08.089 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.089 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.089 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.089 if tag == 'replace':
2025-07-01 17:49:08.089 atags += '^' * la
2025-07-01 17:49:08.089 btags += '^' * lb
2025-07-01 17:49:08.089 elif tag == 'delete':
2025-07-01 17:49:08.089 atags += '-' * la
2025-07-01 17:49:08.089 elif tag == 'insert':
2025-07-01 17:49:08.089 btags += '+' * lb
2025-07-01 17:49:08.089 elif tag == 'equal':
2025-07-01 17:49:08.089 atags += ' ' * la
2025-07-01 17:49:08.089 btags += ' ' * lb
2025-07-01 17:49:08.089 else:
2025-07-01 17:49:08.089 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.089 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.089 else:
2025-07-01 17:49:08.089 # the synch pair is identical
2025-07-01 17:49:08.089 yield ' ' + aelt
2025-07-01 17:49:08.089
2025-07-01 17:49:08.090 # pump out diffs from after the synch point
2025-07-01 17:49:08.090 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.090
2025-07-01 17:49:08.090 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.090 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.090
2025-07-01 17:49:08.090 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.090 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.090 alo = 346, ahi = 1101
2025-07-01 17:49:08.090 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.090 blo = 346, bhi = 1101
2025-07-01 17:49:08.090
2025-07-01 17:49:08.090 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.090 g = []
2025-07-01 17:49:08.090 if alo < ahi:
2025-07-01 17:49:08.090 if blo < bhi:
2025-07-01 17:49:08.090 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.090 else:
2025-07-01 17:49:08.090 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.090 elif blo < bhi:
2025-07-01 17:49:08.090 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.091
2025-07-01 17:49:08.091 > yield from g
2025-07-01 17:49:08.091
2025-07-01 17:49:08.091 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.091 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.091
2025-07-01 17:49:08.091 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.091 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.091 alo = 346, ahi = 1101
2025-07-01 17:49:08.091 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.091 blo = 346, bhi = 1101
2025-07-01 17:49:08.091
2025-07-01 17:49:08.091 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.091 r"""
2025-07-01 17:49:08.091 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.091 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.091 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.091 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.091
2025-07-01 17:49:08.091 Example:
2025-07-01 17:49:08.092
2025-07-01 17:49:08.092 >>> d = Differ()
2025-07-01 17:49:08.092 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.092 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.092 >>> print(''.join(results), end="")
2025-07-01 17:49:08.092 - abcDefghiJkl
2025-07-01 17:49:08.092 + abcdefGhijkl
2025-07-01 17:49:08.092 """
2025-07-01 17:49:08.092
2025-07-01 17:49:08.092 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.092 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.092 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.092 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.092 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.092
2025-07-01 17:49:08.092 # search for the pair that matches best without being identical
2025-07-01 17:49:08.092 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.092 # on junk -- unless we have to)
2025-07-01 17:49:08.092 for j in range(blo, bhi):
2025-07-01 17:49:08.093 bj = b[j]
2025-07-01 17:49:08.093 cruncher.set_seq2(bj)
2025-07-01 17:49:08.093 for i in range(alo, ahi):
2025-07-01 17:49:08.093 ai = a[i]
2025-07-01 17:49:08.093 if ai == bj:
2025-07-01 17:49:08.093 if eqi is None:
2025-07-01 17:49:08.093 eqi, eqj = i, j
2025-07-01 17:49:08.093 continue
2025-07-01 17:49:08.093 cruncher.set_seq1(ai)
2025-07-01 17:49:08.093 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.093 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.093 # compares by a factor of 3.
2025-07-01 17:49:08.093 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.093 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.093 # of the computation is cached by cruncher
2025-07-01 17:49:08.093 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.093 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.093 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.093 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.093 if best_ratio < cutoff:
2025-07-01 17:49:08.094 # no non-identical "pretty close" pair
2025-07-01 17:49:08.096 if eqi is None:
2025-07-01 17:49:08.096 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.097 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.097 return
2025-07-01 17:49:08.097 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.097 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.097 else:
2025-07-01 17:49:08.097 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.097 eqi = None
2025-07-01 17:49:08.097
2025-07-01 17:49:08.097 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.097 # identical
2025-07-01 17:49:08.097
2025-07-01 17:49:08.097 # pump out diffs from before the synch point
2025-07-01 17:49:08.097 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.097
2025-07-01 17:49:08.097 # do intraline marking on the synch pair
2025-07-01 17:49:08.097 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.097 if eqi is None:
2025-07-01 17:49:08.097 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.097 atags = btags = ""
2025-07-01 17:49:08.097 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.098 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.098 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.098 if tag == 'replace':
2025-07-01 17:49:08.098 atags += '^' * la
2025-07-01 17:49:08.098 btags += '^' * lb
2025-07-01 17:49:08.098 elif tag == 'delete':
2025-07-01 17:49:08.098 atags += '-' * la
2025-07-01 17:49:08.098 elif tag == 'insert':
2025-07-01 17:49:08.098 btags += '+' * lb
2025-07-01 17:49:08.098 elif tag == 'equal':
2025-07-01 17:49:08.098 atags += ' ' * la
2025-07-01 17:49:08.098 btags += ' ' * lb
2025-07-01 17:49:08.098 else:
2025-07-01 17:49:08.098 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.098 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.098 else:
2025-07-01 17:49:08.098 # the synch pair is identical
2025-07-01 17:49:08.098 yield ' ' + aelt
2025-07-01 17:49:08.098
2025-07-01 17:49:08.098 # pump out diffs from after the synch point
2025-07-01 17:49:08.099 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.099
2025-07-01 17:49:08.099 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.099 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.099
2025-07-01 17:49:08.099 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.099 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.099 alo = 347, ahi = 1101
2025-07-01 17:49:08.099 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.099 blo = 347, bhi = 1101
2025-07-01 17:49:08.099
2025-07-01 17:49:08.099 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.099 g = []
2025-07-01 17:49:08.099 if alo < ahi:
2025-07-01 17:49:08.099 if blo < bhi:
2025-07-01 17:49:08.099 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.099 else:
2025-07-01 17:49:08.099 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.099 elif blo < bhi:
2025-07-01 17:49:08.100 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.100
2025-07-01 17:49:08.100 > yield from g
2025-07-01 17:49:08.100
2025-07-01 17:49:08.100 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.100
2025-07-01 17:49:08.100 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.100 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.100 alo = 347, ahi = 1101
2025-07-01 17:49:08.100 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.100 blo = 347, bhi = 1101
2025-07-01 17:49:08.100
2025-07-01 17:49:08.100 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.100 r"""
2025-07-01 17:49:08.100 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.100 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.100 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.100 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.101
2025-07-01 17:49:08.101 Example:
2025-07-01 17:49:08.101
2025-07-01 17:49:08.101 >>> d = Differ()
2025-07-01 17:49:08.101 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.101 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.101 >>> print(''.join(results), end="")
2025-07-01 17:49:08.101 - abcDefghiJkl
2025-07-01 17:49:08.101 + abcdefGhijkl
2025-07-01 17:49:08.101 """
2025-07-01 17:49:08.101
2025-07-01 17:49:08.101 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.101 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.101 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.101 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.101 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.101
2025-07-01 17:49:08.101 # search for the pair that matches best without being identical
2025-07-01 17:49:08.102 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.102 # on junk -- unless we have to)
2025-07-01 17:49:08.102 for j in range(blo, bhi):
2025-07-01 17:49:08.102 bj = b[j]
2025-07-01 17:49:08.102 cruncher.set_seq2(bj)
2025-07-01 17:49:08.102 for i in range(alo, ahi):
2025-07-01 17:49:08.102 ai = a[i]
2025-07-01 17:49:08.102 if ai == bj:
2025-07-01 17:49:08.102 if eqi is None:
2025-07-01 17:49:08.102 eqi, eqj = i, j
2025-07-01 17:49:08.102 continue
2025-07-01 17:49:08.102 cruncher.set_seq1(ai)
2025-07-01 17:49:08.102 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.102 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.102 # compares by a factor of 3.
2025-07-01 17:49:08.102 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.102 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.102 # of the computation is cached by cruncher
2025-07-01 17:49:08.102 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.102 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.103 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.103 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.103 if best_ratio < cutoff:
2025-07-01 17:49:08.103 # no non-identical "pretty close" pair
2025-07-01 17:49:08.103 if eqi is None:
2025-07-01 17:49:08.103 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.103 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.103 return
2025-07-01 17:49:08.103 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.103 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.103 else:
2025-07-01 17:49:08.103 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.103 eqi = None
2025-07-01 17:49:08.103
2025-07-01 17:49:08.103 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.103 # identical
2025-07-01 17:49:08.103
2025-07-01 17:49:08.103 # pump out diffs from before the synch point
2025-07-01 17:49:08.103 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.103
2025-07-01 17:49:08.103 # do intraline marking on the synch pair
2025-07-01 17:49:08.104 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.104 if eqi is None:
2025-07-01 17:49:08.104 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.104 atags = btags = ""
2025-07-01 17:49:08.104 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.104 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.104 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.104 if tag == 'replace':
2025-07-01 17:49:08.104 atags += '^' * la
2025-07-01 17:49:08.104 btags += '^' * lb
2025-07-01 17:49:08.104 elif tag == 'delete':
2025-07-01 17:49:08.104 atags += '-' * la
2025-07-01 17:49:08.104 elif tag == 'insert':
2025-07-01 17:49:08.104 btags += '+' * lb
2025-07-01 17:49:08.104 elif tag == 'equal':
2025-07-01 17:49:08.104 atags += ' ' * la
2025-07-01 17:49:08.104 btags += ' ' * lb
2025-07-01 17:49:08.104 else:
2025-07-01 17:49:08.104 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.104 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.105 else:
2025-07-01 17:49:08.105 # the synch pair is identical
2025-07-01 17:49:08.105 yield ' ' + aelt
2025-07-01 17:49:08.105
2025-07-01 17:49:08.105 # pump out diffs from after the synch point
2025-07-01 17:49:08.105 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.105
2025-07-01 17:49:08.105 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.105 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.105
2025-07-01 17:49:08.105 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.105 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.105 alo = 348, ahi = 1101
2025-07-01 17:49:08.105 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.105 blo = 348, bhi = 1101
2025-07-01 17:49:08.105
2025-07-01 17:49:08.105 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.105 g = []
2025-07-01 17:49:08.105 if alo < ahi:
2025-07-01 17:49:08.105 if blo < bhi:
2025-07-01 17:49:08.105 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.106 else:
2025-07-01 17:49:08.106 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.106 elif blo < bhi:
2025-07-01 17:49:08.106 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.106
2025-07-01 17:49:08.106 > yield from g
2025-07-01 17:49:08.106
2025-07-01 17:49:08.106 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.106 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.106
2025-07-01 17:49:08.106 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.106 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.106 alo = 348, ahi = 1101
2025-07-01 17:49:08.106 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.106 blo = 348, bhi = 1101
2025-07-01 17:49:08.106
2025-07-01 17:49:08.106 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.106 r"""
2025-07-01 17:49:08.106 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.106 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.107 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.107 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.107
2025-07-01 17:49:08.107 Example:
2025-07-01 17:49:08.107
2025-07-01 17:49:08.107 >>> d = Differ()
2025-07-01 17:49:08.107 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.107 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.107 >>> print(''.join(results), end="")
2025-07-01 17:49:08.107 - abcDefghiJkl
2025-07-01 17:49:08.107 + abcdefGhijkl
2025-07-01 17:49:08.107 """
2025-07-01 17:49:08.107
2025-07-01 17:49:08.107 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.107 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.107 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.108 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.108 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.108
2025-07-01 17:49:08.108 # search for the pair that matches best without being identical
2025-07-01 17:49:08.108 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.108 # on junk -- unless we have to)
2025-07-01 17:49:08.108 for j in range(blo, bhi):
2025-07-01 17:49:08.108 bj = b[j]
2025-07-01 17:49:08.108 cruncher.set_seq2(bj)
2025-07-01 17:49:08.108 for i in range(alo, ahi):
2025-07-01 17:49:08.108 ai = a[i]
2025-07-01 17:49:08.108 if ai == bj:
2025-07-01 17:49:08.108 if eqi is None:
2025-07-01 17:49:08.108 eqi, eqj = i, j
2025-07-01 17:49:08.108 continue
2025-07-01 17:49:08.108 cruncher.set_seq1(ai)
2025-07-01 17:49:08.108 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.108 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.108 # compares by a factor of 3.
2025-07-01 17:49:08.108 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.108 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.109 # of the computation is cached by cruncher
2025-07-01 17:49:08.109 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.109 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.109 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.109 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.109 if best_ratio < cutoff:
2025-07-01 17:49:08.109 # no non-identical "pretty close" pair
2025-07-01 17:49:08.109 if eqi is None:
2025-07-01 17:49:08.109 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.109 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.109 return
2025-07-01 17:49:08.109 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.109 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.109 else:
2025-07-01 17:49:08.109 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.109 eqi = None
2025-07-01 17:49:08.109
2025-07-01 17:49:08.110 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.114 # identical
2025-07-01 17:49:08.114
2025-07-01 17:49:08.115 # pump out diffs from before the synch point
2025-07-01 17:49:08.115 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.115
2025-07-01 17:49:08.115 # do intraline marking on the synch pair
2025-07-01 17:49:08.115 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.115 if eqi is None:
2025-07-01 17:49:08.115 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.115 atags = btags = ""
2025-07-01 17:49:08.115 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.115 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.115 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.115 if tag == 'replace':
2025-07-01 17:49:08.115 atags += '^' * la
2025-07-01 17:49:08.115 btags += '^' * lb
2025-07-01 17:49:08.115 elif tag == 'delete':
2025-07-01 17:49:08.115 atags += '-' * la
2025-07-01 17:49:08.115 elif tag == 'insert':
2025-07-01 17:49:08.115 btags += '+' * lb
2025-07-01 17:49:08.115 elif tag == 'equal':
2025-07-01 17:49:08.115 atags += ' ' * la
2025-07-01 17:49:08.115 btags += ' ' * lb
2025-07-01 17:49:08.116 else:
2025-07-01 17:49:08.116 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.116 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.116 else:
2025-07-01 17:49:08.116 # the synch pair is identical
2025-07-01 17:49:08.116 yield ' ' + aelt
2025-07-01 17:49:08.116
2025-07-01 17:49:08.116 # pump out diffs from after the synch point
2025-07-01 17:49:08.116 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.116
2025-07-01 17:49:08.116 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.116 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.116
2025-07-01 17:49:08.116 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.116 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.116 alo = 349, ahi = 1101
2025-07-01 17:49:08.116 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.116 blo = 349, bhi = 1101
2025-07-01 17:49:08.116
2025-07-01 17:49:08.116 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.117 g = []
2025-07-01 17:49:08.117 if alo < ahi:
2025-07-01 17:49:08.117 if blo < bhi:
2025-07-01 17:49:08.117 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.117 else:
2025-07-01 17:49:08.117 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.117 elif blo < bhi:
2025-07-01 17:49:08.117 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.117
2025-07-01 17:49:08.117 > yield from g
2025-07-01 17:49:08.117
2025-07-01 17:49:08.117 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.117 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.117
2025-07-01 17:49:08.117 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.117 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.117 alo = 349, ahi = 1101
2025-07-01 17:49:08.117 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.117 blo = 349, bhi = 1101
2025-07-01 17:49:08.118
2025-07-01 17:49:08.118 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.118 r"""
2025-07-01 17:49:08.118 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.118 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.118 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.118 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.118
2025-07-01 17:49:08.118 Example:
2025-07-01 17:49:08.118
2025-07-01 17:49:08.118 >>> d = Differ()
2025-07-01 17:49:08.118 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.118 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.118 >>> print(''.join(results), end="")
2025-07-01 17:49:08.118 - abcDefghiJkl
2025-07-01 17:49:08.118 + abcdefGhijkl
2025-07-01 17:49:08.118 """
2025-07-01 17:49:08.118
2025-07-01 17:49:08.118 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.119 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.119 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.119 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.119 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.119
2025-07-01 17:49:08.119 # search for the pair that matches best without being identical
2025-07-01 17:49:08.119 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.119 # on junk -- unless we have to)
2025-07-01 17:49:08.119 for j in range(blo, bhi):
2025-07-01 17:49:08.119 bj = b[j]
2025-07-01 17:49:08.119 cruncher.set_seq2(bj)
2025-07-01 17:49:08.119 for i in range(alo, ahi):
2025-07-01 17:49:08.119 ai = a[i]
2025-07-01 17:49:08.119 if ai == bj:
2025-07-01 17:49:08.119 if eqi is None:
2025-07-01 17:49:08.119 eqi, eqj = i, j
2025-07-01 17:49:08.119 continue
2025-07-01 17:49:08.119 cruncher.set_seq1(ai)
2025-07-01 17:49:08.119 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.119 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.120 # compares by a factor of 3.
2025-07-01 17:49:08.120 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.120 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.120 # of the computation is cached by cruncher
2025-07-01 17:49:08.120 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.120 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.120 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.120 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.120 if best_ratio < cutoff:
2025-07-01 17:49:08.120 # no non-identical "pretty close" pair
2025-07-01 17:49:08.120 if eqi is None:
2025-07-01 17:49:08.120 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.120 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.120 return
2025-07-01 17:49:08.120 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.120 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.120 else:
2025-07-01 17:49:08.120 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.120 eqi = None
2025-07-01 17:49:08.121
2025-07-01 17:49:08.121 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.121 # identical
2025-07-01 17:49:08.121
2025-07-01 17:49:08.121 # pump out diffs from before the synch point
2025-07-01 17:49:08.121 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.121
2025-07-01 17:49:08.121 # do intraline marking on the synch pair
2025-07-01 17:49:08.121 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.121 if eqi is None:
2025-07-01 17:49:08.121 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.121 atags = btags = ""
2025-07-01 17:49:08.121 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.121 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.121 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.121 if tag == 'replace':
2025-07-01 17:49:08.121 atags += '^' * la
2025-07-01 17:49:08.121 btags += '^' * lb
2025-07-01 17:49:08.121 elif tag == 'delete':
2025-07-01 17:49:08.121 atags += '-' * la
2025-07-01 17:49:08.122 elif tag == 'insert':
2025-07-01 17:49:08.122 btags += '+' * lb
2025-07-01 17:49:08.122 elif tag == 'equal':
2025-07-01 17:49:08.122 atags += ' ' * la
2025-07-01 17:49:08.122 btags += ' ' * lb
2025-07-01 17:49:08.122 else:
2025-07-01 17:49:08.122 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.122 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.122 else:
2025-07-01 17:49:08.122 # the synch pair is identical
2025-07-01 17:49:08.122 yield ' ' + aelt
2025-07-01 17:49:08.122
2025-07-01 17:49:08.122 # pump out diffs from after the synch point
2025-07-01 17:49:08.122 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.122
2025-07-01 17:49:08.122 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.122 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.122
2025-07-01 17:49:08.122 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.122 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.122 alo = 350, ahi = 1101
2025-07-01 17:49:08.123 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.123 blo = 350, bhi = 1101
2025-07-01 17:49:08.123
2025-07-01 17:49:08.123 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.123 g = []
2025-07-01 17:49:08.123 if alo < ahi:
2025-07-01 17:49:08.123 if blo < bhi:
2025-07-01 17:49:08.123 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.123 else:
2025-07-01 17:49:08.123 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.123 elif blo < bhi:
2025-07-01 17:49:08.123 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.123
2025-07-01 17:49:08.123 > yield from g
2025-07-01 17:49:08.123
2025-07-01 17:49:08.123 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.123 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.123
2025-07-01 17:49:08.123 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.124 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.124 alo = 350, ahi = 1101
2025-07-01 17:49:08.124 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.124 blo = 350, bhi = 1101
2025-07-01 17:49:08.124
2025-07-01 17:49:08.124 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.124 r"""
2025-07-01 17:49:08.124 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.124 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.124 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.124 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.124
2025-07-01 17:49:08.124 Example:
2025-07-01 17:49:08.124
2025-07-01 17:49:08.124 >>> d = Differ()
2025-07-01 17:49:08.124 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.124 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.124 >>> print(''.join(results), end="")
2025-07-01 17:49:08.124 - abcDefghiJkl
2025-07-01 17:49:08.124 + abcdefGhijkl
2025-07-01 17:49:08.127 """
2025-07-01 17:49:08.128
2025-07-01 17:49:08.128 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.128 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.128 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.128 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.128 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.128
2025-07-01 17:49:08.128 # search for the pair that matches best without being identical
2025-07-01 17:49:08.128 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.128 # on junk -- unless we have to)
2025-07-01 17:49:08.128 for j in range(blo, bhi):
2025-07-01 17:49:08.128 bj = b[j]
2025-07-01 17:49:08.128 cruncher.set_seq2(bj)
2025-07-01 17:49:08.128 for i in range(alo, ahi):
2025-07-01 17:49:08.128 ai = a[i]
2025-07-01 17:49:08.128 if ai == bj:
2025-07-01 17:49:08.128 if eqi is None:
2025-07-01 17:49:08.128 eqi, eqj = i, j
2025-07-01 17:49:08.128 continue
2025-07-01 17:49:08.128 cruncher.set_seq1(ai)
2025-07-01 17:49:08.129 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.129 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.129 # compares by a factor of 3.
2025-07-01 17:49:08.129 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.129 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.129 # of the computation is cached by cruncher
2025-07-01 17:49:08.129 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.129 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.129 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.129 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.129 if best_ratio < cutoff:
2025-07-01 17:49:08.129 # no non-identical "pretty close" pair
2025-07-01 17:49:08.129 if eqi is None:
2025-07-01 17:49:08.129 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.129 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.129 return
2025-07-01 17:49:08.129 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.129 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.129 else:
2025-07-01 17:49:08.130 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.130 eqi = None
2025-07-01 17:49:08.130
2025-07-01 17:49:08.130 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.130 # identical
2025-07-01 17:49:08.130
2025-07-01 17:49:08.130 # pump out diffs from before the synch point
2025-07-01 17:49:08.130 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.130
2025-07-01 17:49:08.130 # do intraline marking on the synch pair
2025-07-01 17:49:08.130 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.130 if eqi is None:
2025-07-01 17:49:08.130 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.130 atags = btags = ""
2025-07-01 17:49:08.130 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.130 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.130 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.130 if tag == 'replace':
2025-07-01 17:49:08.130 atags += '^' * la
2025-07-01 17:49:08.130 btags += '^' * lb
2025-07-01 17:49:08.131 elif tag == 'delete':
2025-07-01 17:49:08.131 atags += '-' * la
2025-07-01 17:49:08.131 elif tag == 'insert':
2025-07-01 17:49:08.131 btags += '+' * lb
2025-07-01 17:49:08.131 elif tag == 'equal':
2025-07-01 17:49:08.131 atags += ' ' * la
2025-07-01 17:49:08.131 btags += ' ' * lb
2025-07-01 17:49:08.131 else:
2025-07-01 17:49:08.131 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.131 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.131 else:
2025-07-01 17:49:08.131 # the synch pair is identical
2025-07-01 17:49:08.131 yield ' ' + aelt
2025-07-01 17:49:08.131
2025-07-01 17:49:08.131 # pump out diffs from after the synch point
2025-07-01 17:49:08.131 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.131
2025-07-01 17:49:08.131 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.131 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.131
2025-07-01 17:49:08.132 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.132 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.132 alo = 351, ahi = 1101
2025-07-01 17:49:08.132 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.132 blo = 351, bhi = 1101
2025-07-01 17:49:08.132
2025-07-01 17:49:08.132 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.132 g = []
2025-07-01 17:49:08.132 if alo < ahi:
2025-07-01 17:49:08.132 if blo < bhi:
2025-07-01 17:49:08.132 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.132 else:
2025-07-01 17:49:08.132 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.132 elif blo < bhi:
2025-07-01 17:49:08.132 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.132
2025-07-01 17:49:08.132 > yield from g
2025-07-01 17:49:08.132
2025-07-01 17:49:08.133 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.133 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.133
2025-07-01 17:49:08.133 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.133 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.133 alo = 351, ahi = 1101
2025-07-01 17:49:08.133 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.133 blo = 351, bhi = 1101
2025-07-01 17:49:08.133
2025-07-01 17:49:08.133 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.133 r"""
2025-07-01 17:49:08.133 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.133 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.133 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.133 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.133
2025-07-01 17:49:08.133 Example:
2025-07-01 17:49:08.133
2025-07-01 17:49:08.133 >>> d = Differ()
2025-07-01 17:49:08.133 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.134 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.134 >>> print(''.join(results), end="")
2025-07-01 17:49:08.134 - abcDefghiJkl
2025-07-01 17:49:08.134 + abcdefGhijkl
2025-07-01 17:49:08.134 """
2025-07-01 17:49:08.134
2025-07-01 17:49:08.134 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.134 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.134 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.134 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.134 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.134
2025-07-01 17:49:08.134 # search for the pair that matches best without being identical
2025-07-01 17:49:08.134 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.134 # on junk -- unless we have to)
2025-07-01 17:49:08.134 for j in range(blo, bhi):
2025-07-01 17:49:08.134 bj = b[j]
2025-07-01 17:49:08.134 cruncher.set_seq2(bj)
2025-07-01 17:49:08.135 for i in range(alo, ahi):
2025-07-01 17:49:08.135 ai = a[i]
2025-07-01 17:49:08.135 if ai == bj:
2025-07-01 17:49:08.135 if eqi is None:
2025-07-01 17:49:08.135 eqi, eqj = i, j
2025-07-01 17:49:08.135 continue
2025-07-01 17:49:08.135 cruncher.set_seq1(ai)
2025-07-01 17:49:08.135 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.135 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.135 # compares by a factor of 3.
2025-07-01 17:49:08.135 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.135 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.135 # of the computation is cached by cruncher
2025-07-01 17:49:08.135 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.135 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.135 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.135 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.135 if best_ratio < cutoff:
2025-07-01 17:49:08.135 # no non-identical "pretty close" pair
2025-07-01 17:49:08.135 if eqi is None:
2025-07-01 17:49:08.136 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.136 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.136 return
2025-07-01 17:49:08.136 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.136 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.136 else:
2025-07-01 17:49:08.136 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.136 eqi = None
2025-07-01 17:49:08.136
2025-07-01 17:49:08.136 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.136 # identical
2025-07-01 17:49:08.136
2025-07-01 17:49:08.136 # pump out diffs from before the synch point
2025-07-01 17:49:08.136 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.136
2025-07-01 17:49:08.136 # do intraline marking on the synch pair
2025-07-01 17:49:08.136 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.136 if eqi is None:
2025-07-01 17:49:08.136 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.136 atags = btags = ""
2025-07-01 17:49:08.137 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.137 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.137 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.137 if tag == 'replace':
2025-07-01 17:49:08.137 atags += '^' * la
2025-07-01 17:49:08.137 btags += '^' * lb
2025-07-01 17:49:08.137 elif tag == 'delete':
2025-07-01 17:49:08.137 atags += '-' * la
2025-07-01 17:49:08.137 elif tag == 'insert':
2025-07-01 17:49:08.137 btags += '+' * lb
2025-07-01 17:49:08.137 elif tag == 'equal':
2025-07-01 17:49:08.137 atags += ' ' * la
2025-07-01 17:49:08.137 btags += ' ' * lb
2025-07-01 17:49:08.137 else:
2025-07-01 17:49:08.137 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.137 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.137 else:
2025-07-01 17:49:08.137 # the synch pair is identical
2025-07-01 17:49:08.137 yield ' ' + aelt
2025-07-01 17:49:08.137
2025-07-01 17:49:08.137 # pump out diffs from after the synch point
2025-07-01 17:49:08.138 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.138
2025-07-01 17:49:08.138 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.138 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.138
2025-07-01 17:49:08.138 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.138 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.138 alo = 352, ahi = 1101
2025-07-01 17:49:08.138 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.138 blo = 352, bhi = 1101
2025-07-01 17:49:08.138
2025-07-01 17:49:08.138 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.138 g = []
2025-07-01 17:49:08.138 if alo < ahi:
2025-07-01 17:49:08.138 if blo < bhi:
2025-07-01 17:49:08.138 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.138 else:
2025-07-01 17:49:08.138 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.138 elif blo < bhi:
2025-07-01 17:49:08.138 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.139
2025-07-01 17:49:08.139 > yield from g
2025-07-01 17:49:08.139
2025-07-01 17:49:08.139 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.139 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.139
2025-07-01 17:49:08.139 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.139 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.139 alo = 352, ahi = 1101
2025-07-01 17:49:08.139 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.139 blo = 352, bhi = 1101
2025-07-01 17:49:08.139
2025-07-01 17:49:08.139 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.139 r"""
2025-07-01 17:49:08.139 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.139 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.139 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.139 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.139
2025-07-01 17:49:08.139 Example:
2025-07-01 17:49:08.139
2025-07-01 17:49:08.145 >>> d = Differ()
2025-07-01 17:49:08.145 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.145 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.145 >>> print(''.join(results), end="")
2025-07-01 17:49:08.145 - abcDefghiJkl
2025-07-01 17:49:08.145 + abcdefGhijkl
2025-07-01 17:49:08.145 """
2025-07-01 17:49:08.145
2025-07-01 17:49:08.145 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.145 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.145 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.145 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.145 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.145
2025-07-01 17:49:08.145 # search for the pair that matches best without being identical
2025-07-01 17:49:08.146 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.146 # on junk -- unless we have to)
2025-07-01 17:49:08.146 for j in range(blo, bhi):
2025-07-01 17:49:08.146 bj = b[j]
2025-07-01 17:49:08.146 cruncher.set_seq2(bj)
2025-07-01 17:49:08.146 for i in range(alo, ahi):
2025-07-01 17:49:08.146 ai = a[i]
2025-07-01 17:49:08.146 if ai == bj:
2025-07-01 17:49:08.146 if eqi is None:
2025-07-01 17:49:08.146 eqi, eqj = i, j
2025-07-01 17:49:08.146 continue
2025-07-01 17:49:08.146 cruncher.set_seq1(ai)
2025-07-01 17:49:08.146 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.146 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.146 # compares by a factor of 3.
2025-07-01 17:49:08.146 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.146 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.146 # of the computation is cached by cruncher
2025-07-01 17:49:08.147 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.147 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.147 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.147 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.147 if best_ratio < cutoff:
2025-07-01 17:49:08.147 # no non-identical "pretty close" pair
2025-07-01 17:49:08.147 if eqi is None:
2025-07-01 17:49:08.147 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.147 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.147 return
2025-07-01 17:49:08.147 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.147 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.147 else:
2025-07-01 17:49:08.147 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.147 eqi = None
2025-07-01 17:49:08.147
2025-07-01 17:49:08.147 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.147 # identical
2025-07-01 17:49:08.147
2025-07-01 17:49:08.147 # pump out diffs from before the synch point
2025-07-01 17:49:08.147 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.148
2025-07-01 17:49:08.148 # do intraline marking on the synch pair
2025-07-01 17:49:08.148 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.148 if eqi is None:
2025-07-01 17:49:08.148 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.148 atags = btags = ""
2025-07-01 17:49:08.148 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.148 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.148 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.148 if tag == 'replace':
2025-07-01 17:49:08.148 atags += '^' * la
2025-07-01 17:49:08.148 btags += '^' * lb
2025-07-01 17:49:08.148 elif tag == 'delete':
2025-07-01 17:49:08.148 atags += '-' * la
2025-07-01 17:49:08.148 elif tag == 'insert':
2025-07-01 17:49:08.148 btags += '+' * lb
2025-07-01 17:49:08.148 elif tag == 'equal':
2025-07-01 17:49:08.148 atags += ' ' * la
2025-07-01 17:49:08.148 btags += ' ' * lb
2025-07-01 17:49:08.149 else:
2025-07-01 17:49:08.149 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.149 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.149 else:
2025-07-01 17:49:08.149 # the synch pair is identical
2025-07-01 17:49:08.149 yield ' ' + aelt
2025-07-01 17:49:08.149
2025-07-01 17:49:08.149 # pump out diffs from after the synch point
2025-07-01 17:49:08.149 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.149
2025-07-01 17:49:08.149 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.149 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.149
2025-07-01 17:49:08.149 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.149 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.149 alo = 353, ahi = 1101
2025-07-01 17:49:08.149 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.149 blo = 353, bhi = 1101
2025-07-01 17:49:08.149
2025-07-01 17:49:08.149 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.149 g = []
2025-07-01 17:49:08.150 if alo < ahi:
2025-07-01 17:49:08.150 if blo < bhi:
2025-07-01 17:49:08.150 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.150 else:
2025-07-01 17:49:08.150 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.150 elif blo < bhi:
2025-07-01 17:49:08.150 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.150
2025-07-01 17:49:08.150 > yield from g
2025-07-01 17:49:08.150
2025-07-01 17:49:08.150 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.150 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.150
2025-07-01 17:49:08.150 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.150 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.150 alo = 353, ahi = 1101
2025-07-01 17:49:08.150 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.150 blo = 353, bhi = 1101
2025-07-01 17:49:08.150
2025-07-01 17:49:08.150 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.151 r"""
2025-07-01 17:49:08.151 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.151 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.151 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.151 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.151
2025-07-01 17:49:08.151 Example:
2025-07-01 17:49:08.151
2025-07-01 17:49:08.151 >>> d = Differ()
2025-07-01 17:49:08.151 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.151 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.151 >>> print(''.join(results), end="")
2025-07-01 17:49:08.151 - abcDefghiJkl
2025-07-01 17:49:08.151 + abcdefGhijkl
2025-07-01 17:49:08.151 """
2025-07-01 17:49:08.151
2025-07-01 17:49:08.151 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.151 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.151 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.152 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.152 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.152
2025-07-01 17:49:08.152 # search for the pair that matches best without being identical
2025-07-01 17:49:08.152 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.152 # on junk -- unless we have to)
2025-07-01 17:49:08.152 for j in range(blo, bhi):
2025-07-01 17:49:08.152 bj = b[j]
2025-07-01 17:49:08.152 cruncher.set_seq2(bj)
2025-07-01 17:49:08.152 for i in range(alo, ahi):
2025-07-01 17:49:08.152 ai = a[i]
2025-07-01 17:49:08.152 if ai == bj:
2025-07-01 17:49:08.152 if eqi is None:
2025-07-01 17:49:08.152 eqi, eqj = i, j
2025-07-01 17:49:08.152 continue
2025-07-01 17:49:08.152 cruncher.set_seq1(ai)
2025-07-01 17:49:08.152 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.152 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.152 # compares by a factor of 3.
2025-07-01 17:49:08.152 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.153 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.153 # of the computation is cached by cruncher
2025-07-01 17:49:08.153 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.153 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.153 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.153 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.153 if best_ratio < cutoff:
2025-07-01 17:49:08.153 # no non-identical "pretty close" pair
2025-07-01 17:49:08.153 if eqi is None:
2025-07-01 17:49:08.153 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.153 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.153 return
2025-07-01 17:49:08.153 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.153 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.153 else:
2025-07-01 17:49:08.153 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.153 eqi = None
2025-07-01 17:49:08.153
2025-07-01 17:49:08.153 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.153 # identical
2025-07-01 17:49:08.153
2025-07-01 17:49:08.154 # pump out diffs from before the synch point
2025-07-01 17:49:08.154 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.154
2025-07-01 17:49:08.154 # do intraline marking on the synch pair
2025-07-01 17:49:08.154 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.154 if eqi is None:
2025-07-01 17:49:08.154 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.154 atags = btags = ""
2025-07-01 17:49:08.154 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.154 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.154 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.154 if tag == 'replace':
2025-07-01 17:49:08.154 atags += '^' * la
2025-07-01 17:49:08.154 btags += '^' * lb
2025-07-01 17:49:08.154 elif tag == 'delete':
2025-07-01 17:49:08.154 atags += '-' * la
2025-07-01 17:49:08.154 elif tag == 'insert':
2025-07-01 17:49:08.154 btags += '+' * lb
2025-07-01 17:49:08.154 elif tag == 'equal':
2025-07-01 17:49:08.154 atags += ' ' * la
2025-07-01 17:49:08.155 btags += ' ' * lb
2025-07-01 17:49:08.158 else:
2025-07-01 17:49:08.158 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.158 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.158 else:
2025-07-01 17:49:08.158 # the synch pair is identical
2025-07-01 17:49:08.158 yield ' ' + aelt
2025-07-01 17:49:08.158
2025-07-01 17:49:08.158 # pump out diffs from after the synch point
2025-07-01 17:49:08.158 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.158
2025-07-01 17:49:08.158 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.158 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.158
2025-07-01 17:49:08.158 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.158 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.158 alo = 356, ahi = 1101
2025-07-01 17:49:08.158 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.158 blo = 356, bhi = 1101
2025-07-01 17:49:08.158
2025-07-01 17:49:08.158 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.159 g = []
2025-07-01 17:49:08.159 if alo < ahi:
2025-07-01 17:49:08.159 if blo < bhi:
2025-07-01 17:49:08.159 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.159 else:
2025-07-01 17:49:08.159 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.159 elif blo < bhi:
2025-07-01 17:49:08.159 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.159
2025-07-01 17:49:08.159 > yield from g
2025-07-01 17:49:08.159
2025-07-01 17:49:08.159 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.159 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.159
2025-07-01 17:49:08.159 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.159 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.159 alo = 356, ahi = 1101
2025-07-01 17:49:08.159 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.159 blo = 356, bhi = 1101
2025-07-01 17:49:08.159
2025-07-01 17:49:08.160 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.160 r"""
2025-07-01 17:49:08.160 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.160 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.160 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.160 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.160
2025-07-01 17:49:08.160 Example:
2025-07-01 17:49:08.160
2025-07-01 17:49:08.160 >>> d = Differ()
2025-07-01 17:49:08.160 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.160 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.160 >>> print(''.join(results), end="")
2025-07-01 17:49:08.160 - abcDefghiJkl
2025-07-01 17:49:08.160 + abcdefGhijkl
2025-07-01 17:49:08.160 """
2025-07-01 17:49:08.160
2025-07-01 17:49:08.160 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.160 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.161 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.161 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.161 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.161
2025-07-01 17:49:08.161 # search for the pair that matches best without being identical
2025-07-01 17:49:08.161 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.161 # on junk -- unless we have to)
2025-07-01 17:49:08.161 for j in range(blo, bhi):
2025-07-01 17:49:08.161 bj = b[j]
2025-07-01 17:49:08.161 cruncher.set_seq2(bj)
2025-07-01 17:49:08.161 for i in range(alo, ahi):
2025-07-01 17:49:08.161 ai = a[i]
2025-07-01 17:49:08.161 if ai == bj:
2025-07-01 17:49:08.161 if eqi is None:
2025-07-01 17:49:08.161 eqi, eqj = i, j
2025-07-01 17:49:08.161 continue
2025-07-01 17:49:08.161 cruncher.set_seq1(ai)
2025-07-01 17:49:08.161 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.161 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.162 # compares by a factor of 3.
2025-07-01 17:49:08.162 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.162 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.162 # of the computation is cached by cruncher
2025-07-01 17:49:08.162 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.162 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.162 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.162 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.162 if best_ratio < cutoff:
2025-07-01 17:49:08.162 # no non-identical "pretty close" pair
2025-07-01 17:49:08.162 if eqi is None:
2025-07-01 17:49:08.162 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.162 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.162 return
2025-07-01 17:49:08.162 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.162 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.162 else:
2025-07-01 17:49:08.162 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.162 eqi = None
2025-07-01 17:49:08.162
2025-07-01 17:49:08.163 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.163 # identical
2025-07-01 17:49:08.163
2025-07-01 17:49:08.163 # pump out diffs from before the synch point
2025-07-01 17:49:08.163 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.163
2025-07-01 17:49:08.163 # do intraline marking on the synch pair
2025-07-01 17:49:08.163 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.163 if eqi is None:
2025-07-01 17:49:08.163 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.163 atags = btags = ""
2025-07-01 17:49:08.163 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.163 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.163 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.163 if tag == 'replace':
2025-07-01 17:49:08.163 atags += '^' * la
2025-07-01 17:49:08.163 btags += '^' * lb
2025-07-01 17:49:08.163 elif tag == 'delete':
2025-07-01 17:49:08.163 atags += '-' * la
2025-07-01 17:49:08.163 elif tag == 'insert':
2025-07-01 17:49:08.163 btags += '+' * lb
2025-07-01 17:49:08.164 elif tag == 'equal':
2025-07-01 17:49:08.164 atags += ' ' * la
2025-07-01 17:49:08.164 btags += ' ' * lb
2025-07-01 17:49:08.164 else:
2025-07-01 17:49:08.164 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.164 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.164 else:
2025-07-01 17:49:08.164 # the synch pair is identical
2025-07-01 17:49:08.164 yield ' ' + aelt
2025-07-01 17:49:08.164
2025-07-01 17:49:08.164 # pump out diffs from after the synch point
2025-07-01 17:49:08.164 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.164
2025-07-01 17:49:08.164 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.164 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.164
2025-07-01 17:49:08.164 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.164 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.164 alo = 357, ahi = 1101
2025-07-01 17:49:08.164 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.164 blo = 357, bhi = 1101
2025-07-01 17:49:08.165
2025-07-01 17:49:08.165 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.165 g = []
2025-07-01 17:49:08.165 if alo < ahi:
2025-07-01 17:49:08.165 if blo < bhi:
2025-07-01 17:49:08.165 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.165 else:
2025-07-01 17:49:08.165 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.165 elif blo < bhi:
2025-07-01 17:49:08.165 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.165
2025-07-01 17:49:08.165 > yield from g
2025-07-01 17:49:08.165
2025-07-01 17:49:08.165 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.165 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.165
2025-07-01 17:49:08.165 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.165 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.165 alo = 357, ahi = 1101
2025-07-01 17:49:08.165 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.166 blo = 357, bhi = 1101
2025-07-01 17:49:08.166
2025-07-01 17:49:08.166 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.166 r"""
2025-07-01 17:49:08.166 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.166 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.166 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.166 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.166
2025-07-01 17:49:08.166 Example:
2025-07-01 17:49:08.166
2025-07-01 17:49:08.166 >>> d = Differ()
2025-07-01 17:49:08.166 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.166 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.166 >>> print(''.join(results), end="")
2025-07-01 17:49:08.166 - abcDefghiJkl
2025-07-01 17:49:08.166 + abcdefGhijkl
2025-07-01 17:49:08.166 """
2025-07-01 17:49:08.167
2025-07-01 17:49:08.167 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.167 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.167 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.167 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.167 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.167
2025-07-01 17:49:08.167 # search for the pair that matches best without being identical
2025-07-01 17:49:08.167 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.167 # on junk -- unless we have to)
2025-07-01 17:49:08.167 for j in range(blo, bhi):
2025-07-01 17:49:08.167 bj = b[j]
2025-07-01 17:49:08.167 cruncher.set_seq2(bj)
2025-07-01 17:49:08.167 for i in range(alo, ahi):
2025-07-01 17:49:08.167 ai = a[i]
2025-07-01 17:49:08.167 if ai == bj:
2025-07-01 17:49:08.167 if eqi is None:
2025-07-01 17:49:08.167 eqi, eqj = i, j
2025-07-01 17:49:08.167 continue
2025-07-01 17:49:08.167 cruncher.set_seq1(ai)
2025-07-01 17:49:08.167 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.168 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.168 # compares by a factor of 3.
2025-07-01 17:49:08.168 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.168 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.168 # of the computation is cached by cruncher
2025-07-01 17:49:08.168 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.168 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.168 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.168 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.168 if best_ratio < cutoff:
2025-07-01 17:49:08.168 # no non-identical "pretty close" pair
2025-07-01 17:49:08.168 if eqi is None:
2025-07-01 17:49:08.168 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.168 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.168 return
2025-07-01 17:49:08.168 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.168 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.168 else:
2025-07-01 17:49:08.168 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.168 eqi = None
2025-07-01 17:49:08.168
2025-07-01 17:49:08.169 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.169 # identical
2025-07-01 17:49:08.169
2025-07-01 17:49:08.169 # pump out diffs from before the synch point
2025-07-01 17:49:08.169 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.169
2025-07-01 17:49:08.169 # do intraline marking on the synch pair
2025-07-01 17:49:08.169 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.169 if eqi is None:
2025-07-01 17:49:08.169 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.169 atags = btags = ""
2025-07-01 17:49:08.169 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.169 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.169 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.169 if tag == 'replace':
2025-07-01 17:49:08.169 atags += '^' * la
2025-07-01 17:49:08.169 btags += '^' * lb
2025-07-01 17:49:08.169 elif tag == 'delete':
2025-07-01 17:49:08.169 atags += '-' * la
2025-07-01 17:49:08.169 elif tag == 'insert':
2025-07-01 17:49:08.169 btags += '+' * lb
2025-07-01 17:49:08.170 elif tag == 'equal':
2025-07-01 17:49:08.170 atags += ' ' * la
2025-07-01 17:49:08.170 btags += ' ' * lb
2025-07-01 17:49:08.170 else:
2025-07-01 17:49:08.170 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.170 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.170 else:
2025-07-01 17:49:08.170 # the synch pair is identical
2025-07-01 17:49:08.170 yield ' ' + aelt
2025-07-01 17:49:08.170
2025-07-01 17:49:08.170 # pump out diffs from after the synch point
2025-07-01 17:49:08.170 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.170
2025-07-01 17:49:08.170 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.170 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.170
2025-07-01 17:49:08.170 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.170 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.170 alo = 358, ahi = 1101
2025-07-01 17:49:08.170 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.171 blo = 358, bhi = 1101
2025-07-01 17:49:08.176
2025-07-01 17:49:08.176 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.176 g = []
2025-07-01 17:49:08.176 if alo < ahi:
2025-07-01 17:49:08.176 if blo < bhi:
2025-07-01 17:49:08.176 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.176 else:
2025-07-01 17:49:08.176 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.176 elif blo < bhi:
2025-07-01 17:49:08.176 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.176
2025-07-01 17:49:08.176 > yield from g
2025-07-01 17:49:08.176
2025-07-01 17:49:08.176 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.176 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.176
2025-07-01 17:49:08.176 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.176 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.176 alo = 358, ahi = 1101
2025-07-01 17:49:08.177 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.177 blo = 358, bhi = 1101
2025-07-01 17:49:08.177
2025-07-01 17:49:08.177 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.177 r"""
2025-07-01 17:49:08.177 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.177 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.177 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.177 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.177
2025-07-01 17:49:08.177 Example:
2025-07-01 17:49:08.177
2025-07-01 17:49:08.177 >>> d = Differ()
2025-07-01 17:49:08.177 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.177 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.177 >>> print(''.join(results), end="")
2025-07-01 17:49:08.177 - abcDefghiJkl
2025-07-01 17:49:08.177 + abcdefGhijkl
2025-07-01 17:49:08.177 """
2025-07-01 17:49:08.177
2025-07-01 17:49:08.178 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.178 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.178 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.178 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.178 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.178
2025-07-01 17:49:08.178 # search for the pair that matches best without being identical
2025-07-01 17:49:08.178 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.178 # on junk -- unless we have to)
2025-07-01 17:49:08.178 for j in range(blo, bhi):
2025-07-01 17:49:08.178 bj = b[j]
2025-07-01 17:49:08.178 cruncher.set_seq2(bj)
2025-07-01 17:49:08.178 for i in range(alo, ahi):
2025-07-01 17:49:08.178 ai = a[i]
2025-07-01 17:49:08.178 if ai == bj:
2025-07-01 17:49:08.178 if eqi is None:
2025-07-01 17:49:08.178 eqi, eqj = i, j
2025-07-01 17:49:08.178 continue
2025-07-01 17:49:08.178 cruncher.set_seq1(ai)
2025-07-01 17:49:08.178 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.178 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.179 # compares by a factor of 3.
2025-07-01 17:49:08.179 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.179 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.179 # of the computation is cached by cruncher
2025-07-01 17:49:08.179 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.179 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.179 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.179 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.179 if best_ratio < cutoff:
2025-07-01 17:49:08.179 # no non-identical "pretty close" pair
2025-07-01 17:49:08.179 if eqi is None:
2025-07-01 17:49:08.179 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.179 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.179 return
2025-07-01 17:49:08.179 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.179 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.179 else:
2025-07-01 17:49:08.179 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.179 eqi = None
2025-07-01 17:49:08.180
2025-07-01 17:49:08.180 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.180 # identical
2025-07-01 17:49:08.180
2025-07-01 17:49:08.180 # pump out diffs from before the synch point
2025-07-01 17:49:08.180 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.180
2025-07-01 17:49:08.180 # do intraline marking on the synch pair
2025-07-01 17:49:08.180 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.180 if eqi is None:
2025-07-01 17:49:08.180 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.180 atags = btags = ""
2025-07-01 17:49:08.180 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.180 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.180 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.180 if tag == 'replace':
2025-07-01 17:49:08.180 atags += '^' * la
2025-07-01 17:49:08.180 btags += '^' * lb
2025-07-01 17:49:08.180 elif tag == 'delete':
2025-07-01 17:49:08.180 atags += '-' * la
2025-07-01 17:49:08.181 elif tag == 'insert':
2025-07-01 17:49:08.181 btags += '+' * lb
2025-07-01 17:49:08.181 elif tag == 'equal':
2025-07-01 17:49:08.181 atags += ' ' * la
2025-07-01 17:49:08.181 btags += ' ' * lb
2025-07-01 17:49:08.181 else:
2025-07-01 17:49:08.181 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.181 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.181 else:
2025-07-01 17:49:08.181 # the synch pair is identical
2025-07-01 17:49:08.181 yield ' ' + aelt
2025-07-01 17:49:08.181
2025-07-01 17:49:08.181 # pump out diffs from after the synch point
2025-07-01 17:49:08.181 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.181
2025-07-01 17:49:08.181 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.181 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.181
2025-07-01 17:49:08.181 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.181 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.181 alo = 359, ahi = 1101
2025-07-01 17:49:08.182 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.182 blo = 359, bhi = 1101
2025-07-01 17:49:08.182
2025-07-01 17:49:08.182 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.182 g = []
2025-07-01 17:49:08.182 if alo < ahi:
2025-07-01 17:49:08.182 if blo < bhi:
2025-07-01 17:49:08.182 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.182 else:
2025-07-01 17:49:08.182 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.182 elif blo < bhi:
2025-07-01 17:49:08.182 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.182
2025-07-01 17:49:08.182 > yield from g
2025-07-01 17:49:08.182
2025-07-01 17:49:08.182 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.182 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.182
2025-07-01 17:49:08.182 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.182 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.183 alo = 359, ahi = 1101
2025-07-01 17:49:08.183 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.183 blo = 359, bhi = 1101
2025-07-01 17:49:08.183
2025-07-01 17:49:08.183 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.183 r"""
2025-07-01 17:49:08.183 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.183 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.183 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.183 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.183
2025-07-01 17:49:08.183 Example:
2025-07-01 17:49:08.183
2025-07-01 17:49:08.183 >>> d = Differ()
2025-07-01 17:49:08.183 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.183 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.183 >>> print(''.join(results), end="")
2025-07-01 17:49:08.183 - abcDefghiJkl
2025-07-01 17:49:08.183 + abcdefGhijkl
2025-07-01 17:49:08.184 """
2025-07-01 17:49:08.184
2025-07-01 17:49:08.184 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.184 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.184 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.184 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.184 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.184
2025-07-01 17:49:08.184 # search for the pair that matches best without being identical
2025-07-01 17:49:08.184 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.184 # on junk -- unless we have to)
2025-07-01 17:49:08.184 for j in range(blo, bhi):
2025-07-01 17:49:08.184 bj = b[j]
2025-07-01 17:49:08.184 cruncher.set_seq2(bj)
2025-07-01 17:49:08.184 for i in range(alo, ahi):
2025-07-01 17:49:08.184 ai = a[i]
2025-07-01 17:49:08.184 if ai == bj:
2025-07-01 17:49:08.184 if eqi is None:
2025-07-01 17:49:08.184 eqi, eqj = i, j
2025-07-01 17:49:08.184 continue
2025-07-01 17:49:08.184 cruncher.set_seq1(ai)
2025-07-01 17:49:08.185 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.185 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.185 # compares by a factor of 3.
2025-07-01 17:49:08.185 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.185 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.185 # of the computation is cached by cruncher
2025-07-01 17:49:08.185 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.185 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.185 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.185 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.185 if best_ratio < cutoff:
2025-07-01 17:49:08.185 # no non-identical "pretty close" pair
2025-07-01 17:49:08.185 if eqi is None:
2025-07-01 17:49:08.185 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.185 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.185 return
2025-07-01 17:49:08.185 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.185 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.185 else:
2025-07-01 17:49:08.185 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.186 eqi = None
2025-07-01 17:49:08.188
2025-07-01 17:49:08.189 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.189 # identical
2025-07-01 17:49:08.189
2025-07-01 17:49:08.189 # pump out diffs from before the synch point
2025-07-01 17:49:08.189 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.189
2025-07-01 17:49:08.189 # do intraline marking on the synch pair
2025-07-01 17:49:08.189 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.189 if eqi is None:
2025-07-01 17:49:08.189 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.189 atags = btags = ""
2025-07-01 17:49:08.189 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.189 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.189 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.189 if tag == 'replace':
2025-07-01 17:49:08.189 atags += '^' * la
2025-07-01 17:49:08.189 btags += '^' * lb
2025-07-01 17:49:08.189 elif tag == 'delete':
2025-07-01 17:49:08.189 atags += '-' * la
2025-07-01 17:49:08.189 elif tag == 'insert':
2025-07-01 17:49:08.190 btags += '+' * lb
2025-07-01 17:49:08.190 elif tag == 'equal':
2025-07-01 17:49:08.190 atags += ' ' * la
2025-07-01 17:49:08.190 btags += ' ' * lb
2025-07-01 17:49:08.190 else:
2025-07-01 17:49:08.190 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.190 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.190 else:
2025-07-01 17:49:08.190 # the synch pair is identical
2025-07-01 17:49:08.190 yield ' ' + aelt
2025-07-01 17:49:08.190
2025-07-01 17:49:08.190 # pump out diffs from after the synch point
2025-07-01 17:49:08.190 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.190
2025-07-01 17:49:08.190 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.190 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.190
2025-07-01 17:49:08.190 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.190 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.190 alo = 360, ahi = 1101
2025-07-01 17:49:08.190 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.191 blo = 360, bhi = 1101
2025-07-01 17:49:08.191
2025-07-01 17:49:08.191 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.191 g = []
2025-07-01 17:49:08.191 if alo < ahi:
2025-07-01 17:49:08.191 if blo < bhi:
2025-07-01 17:49:08.191 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.191 else:
2025-07-01 17:49:08.191 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.191 elif blo < bhi:
2025-07-01 17:49:08.191 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.191
2025-07-01 17:49:08.191 > yield from g
2025-07-01 17:49:08.191
2025-07-01 17:49:08.191 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.191 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.192
2025-07-01 17:49:08.192 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.192 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.192 alo = 360, ahi = 1101
2025-07-01 17:49:08.192 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.192 blo = 360, bhi = 1101
2025-07-01 17:49:08.192
2025-07-01 17:49:08.192 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.192 r"""
2025-07-01 17:49:08.192 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.192 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.192 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.192 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.192
2025-07-01 17:49:08.192 Example:
2025-07-01 17:49:08.192
2025-07-01 17:49:08.192 >>> d = Differ()
2025-07-01 17:49:08.192 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.192 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.193 >>> print(''.join(results), end="")
2025-07-01 17:49:08.193 - abcDefghiJkl
2025-07-01 17:49:08.193 + abcdefGhijkl
2025-07-01 17:49:08.193 """
2025-07-01 17:49:08.193
2025-07-01 17:49:08.193 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.193 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.193 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.193 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.193 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.193
2025-07-01 17:49:08.193 # search for the pair that matches best without being identical
2025-07-01 17:49:08.193 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.193 # on junk -- unless we have to)
2025-07-01 17:49:08.193 for j in range(blo, bhi):
2025-07-01 17:49:08.193 bj = b[j]
2025-07-01 17:49:08.193 cruncher.set_seq2(bj)
2025-07-01 17:49:08.193 for i in range(alo, ahi):
2025-07-01 17:49:08.193 ai = a[i]
2025-07-01 17:49:08.194 if ai == bj:
2025-07-01 17:49:08.194 if eqi is None:
2025-07-01 17:49:08.194 eqi, eqj = i, j
2025-07-01 17:49:08.194 continue
2025-07-01 17:49:08.194 cruncher.set_seq1(ai)
2025-07-01 17:49:08.194 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.194 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.194 # compares by a factor of 3.
2025-07-01 17:49:08.194 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.194 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.194 # of the computation is cached by cruncher
2025-07-01 17:49:08.194 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.194 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.194 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.194 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.194 if best_ratio < cutoff:
2025-07-01 17:49:08.194 # no non-identical "pretty close" pair
2025-07-01 17:49:08.194 if eqi is None:
2025-07-01 17:49:08.194 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.194 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.194 return
2025-07-01 17:49:08.195 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.195 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.195 else:
2025-07-01 17:49:08.195 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.195 eqi = None
2025-07-01 17:49:08.195
2025-07-01 17:49:08.195 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.195 # identical
2025-07-01 17:49:08.195
2025-07-01 17:49:08.195 # pump out diffs from before the synch point
2025-07-01 17:49:08.195 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.195
2025-07-01 17:49:08.195 # do intraline marking on the synch pair
2025-07-01 17:49:08.195 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.195 if eqi is None:
2025-07-01 17:49:08.195 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.195 atags = btags = ""
2025-07-01 17:49:08.195 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.195 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.195 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.195 if tag == 'replace':
2025-07-01 17:49:08.195 atags += '^' * la
2025-07-01 17:49:08.196 btags += '^' * lb
2025-07-01 17:49:08.196 elif tag == 'delete':
2025-07-01 17:49:08.196 atags += '-' * la
2025-07-01 17:49:08.196 elif tag == 'insert':
2025-07-01 17:49:08.196 btags += '+' * lb
2025-07-01 17:49:08.196 elif tag == 'equal':
2025-07-01 17:49:08.196 atags += ' ' * la
2025-07-01 17:49:08.196 btags += ' ' * lb
2025-07-01 17:49:08.196 else:
2025-07-01 17:49:08.196 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.196 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.196 else:
2025-07-01 17:49:08.196 # the synch pair is identical
2025-07-01 17:49:08.196 yield ' ' + aelt
2025-07-01 17:49:08.196
2025-07-01 17:49:08.196 # pump out diffs from after the synch point
2025-07-01 17:49:08.196 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.196
2025-07-01 17:49:08.196 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.196 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.196
2025-07-01 17:49:08.197 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.197 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.197 alo = 361, ahi = 1101
2025-07-01 17:49:08.197 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.197 blo = 361, bhi = 1101
2025-07-01 17:49:08.197
2025-07-01 17:49:08.197 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.197 g = []
2025-07-01 17:49:08.197 if alo < ahi:
2025-07-01 17:49:08.197 if blo < bhi:
2025-07-01 17:49:08.197 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.197 else:
2025-07-01 17:49:08.197 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.197 elif blo < bhi:
2025-07-01 17:49:08.197 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.197
2025-07-01 17:49:08.197 > yield from g
2025-07-01 17:49:08.197
2025-07-01 17:49:08.197 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.197 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.198
2025-07-01 17:49:08.198 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.198 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.198 alo = 361, ahi = 1101
2025-07-01 17:49:08.198 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.198 blo = 361, bhi = 1101
2025-07-01 17:49:08.198
2025-07-01 17:49:08.198 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.198 r"""
2025-07-01 17:49:08.198 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.198 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.198 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.198 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.198
2025-07-01 17:49:08.198 Example:
2025-07-01 17:49:08.198
2025-07-01 17:49:08.198 >>> d = Differ()
2025-07-01 17:49:08.198 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.198 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.199 >>> print(''.join(results), end="")
2025-07-01 17:49:08.199 - abcDefghiJkl
2025-07-01 17:49:08.199 + abcdefGhijkl
2025-07-01 17:49:08.199 """
2025-07-01 17:49:08.199
2025-07-01 17:49:08.199 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.199 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.199 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.199 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.199 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.199
2025-07-01 17:49:08.199 # search for the pair that matches best without being identical
2025-07-01 17:49:08.199 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.199 # on junk -- unless we have to)
2025-07-01 17:49:08.199 for j in range(blo, bhi):
2025-07-01 17:49:08.199 bj = b[j]
2025-07-01 17:49:08.199 cruncher.set_seq2(bj)
2025-07-01 17:49:08.199 for i in range(alo, ahi):
2025-07-01 17:49:08.200 ai = a[i]
2025-07-01 17:49:08.200 if ai == bj:
2025-07-01 17:49:08.200 if eqi is None:
2025-07-01 17:49:08.200 eqi, eqj = i, j
2025-07-01 17:49:08.200 continue
2025-07-01 17:49:08.200 cruncher.set_seq1(ai)
2025-07-01 17:49:08.200 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.200 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.200 # compares by a factor of 3.
2025-07-01 17:49:08.200 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.200 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.200 # of the computation is cached by cruncher
2025-07-01 17:49:08.200 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.200 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.200 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.200 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.200 if best_ratio < cutoff:
2025-07-01 17:49:08.200 # no non-identical "pretty close" pair
2025-07-01 17:49:08.200 if eqi is None:
2025-07-01 17:49:08.200 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.201 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.201 return
2025-07-01 17:49:08.201 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.201 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.201 else:
2025-07-01 17:49:08.201 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.201 eqi = None
2025-07-01 17:49:08.201
2025-07-01 17:49:08.201 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.201 # identical
2025-07-01 17:49:08.201
2025-07-01 17:49:08.201 # pump out diffs from before the synch point
2025-07-01 17:49:08.201 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.201
2025-07-01 17:49:08.201 # do intraline marking on the synch pair
2025-07-01 17:49:08.201 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.201 if eqi is None:
2025-07-01 17:49:08.201 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.201 atags = btags = ""
2025-07-01 17:49:08.201 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.201 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.206 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.207 if tag == 'replace':
2025-07-01 17:49:08.207 atags += '^' * la
2025-07-01 17:49:08.207 btags += '^' * lb
2025-07-01 17:49:08.207 elif tag == 'delete':
2025-07-01 17:49:08.207 atags += '-' * la
2025-07-01 17:49:08.207 elif tag == 'insert':
2025-07-01 17:49:08.207 btags += '+' * lb
2025-07-01 17:49:08.207 elif tag == 'equal':
2025-07-01 17:49:08.207 atags += ' ' * la
2025-07-01 17:49:08.207 btags += ' ' * lb
2025-07-01 17:49:08.207 else:
2025-07-01 17:49:08.207 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.207 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.207 else:
2025-07-01 17:49:08.207 # the synch pair is identical
2025-07-01 17:49:08.207 yield ' ' + aelt
2025-07-01 17:49:08.207
2025-07-01 17:49:08.207 # pump out diffs from after the synch point
2025-07-01 17:49:08.207 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.207
2025-07-01 17:49:08.207 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.208 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.208
2025-07-01 17:49:08.208 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.208 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.208 alo = 362, ahi = 1101
2025-07-01 17:49:08.208 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.208 blo = 362, bhi = 1101
2025-07-01 17:49:08.208
2025-07-01 17:49:08.208 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.208 g = []
2025-07-01 17:49:08.208 if alo < ahi:
2025-07-01 17:49:08.208 if blo < bhi:
2025-07-01 17:49:08.208 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.208 else:
2025-07-01 17:49:08.208 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.208 elif blo < bhi:
2025-07-01 17:49:08.208 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.208
2025-07-01 17:49:08.208 > yield from g
2025-07-01 17:49:08.208
2025-07-01 17:49:08.209 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.209 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.209
2025-07-01 17:49:08.209 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.209 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.209 alo = 362, ahi = 1101
2025-07-01 17:49:08.209 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.209 blo = 362, bhi = 1101
2025-07-01 17:49:08.209
2025-07-01 17:49:08.209 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.209 r"""
2025-07-01 17:49:08.209 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.209 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.209 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.209 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.209
2025-07-01 17:49:08.209 Example:
2025-07-01 17:49:08.209
2025-07-01 17:49:08.209 >>> d = Differ()
2025-07-01 17:49:08.209 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.210 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.210 >>> print(''.join(results), end="")
2025-07-01 17:49:08.210 - abcDefghiJkl
2025-07-01 17:49:08.210 + abcdefGhijkl
2025-07-01 17:49:08.210 """
2025-07-01 17:49:08.210
2025-07-01 17:49:08.210 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.210 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.210 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.210 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.210 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.210
2025-07-01 17:49:08.210 # search for the pair that matches best without being identical
2025-07-01 17:49:08.210 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.210 # on junk -- unless we have to)
2025-07-01 17:49:08.210 for j in range(blo, bhi):
2025-07-01 17:49:08.210 bj = b[j]
2025-07-01 17:49:08.210 cruncher.set_seq2(bj)
2025-07-01 17:49:08.210 for i in range(alo, ahi):
2025-07-01 17:49:08.211 ai = a[i]
2025-07-01 17:49:08.211 if ai == bj:
2025-07-01 17:49:08.211 if eqi is None:
2025-07-01 17:49:08.211 eqi, eqj = i, j
2025-07-01 17:49:08.211 continue
2025-07-01 17:49:08.211 cruncher.set_seq1(ai)
2025-07-01 17:49:08.211 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.211 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.211 # compares by a factor of 3.
2025-07-01 17:49:08.211 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.211 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.211 # of the computation is cached by cruncher
2025-07-01 17:49:08.211 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.211 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.211 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.211 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.211 if best_ratio < cutoff:
2025-07-01 17:49:08.211 # no non-identical "pretty close" pair
2025-07-01 17:49:08.211 if eqi is None:
2025-07-01 17:49:08.211 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.211 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.211 return
2025-07-01 17:49:08.212 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.212 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.212 else:
2025-07-01 17:49:08.212 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.212 eqi = None
2025-07-01 17:49:08.212
2025-07-01 17:49:08.212 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.212 # identical
2025-07-01 17:49:08.212
2025-07-01 17:49:08.212 # pump out diffs from before the synch point
2025-07-01 17:49:08.212 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.212
2025-07-01 17:49:08.212 # do intraline marking on the synch pair
2025-07-01 17:49:08.212 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.212 if eqi is None:
2025-07-01 17:49:08.212 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.212 atags = btags = ""
2025-07-01 17:49:08.212 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.212 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.212 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.213 if tag == 'replace':
2025-07-01 17:49:08.213 atags += '^' * la
2025-07-01 17:49:08.213 btags += '^' * lb
2025-07-01 17:49:08.213 elif tag == 'delete':
2025-07-01 17:49:08.213 atags += '-' * la
2025-07-01 17:49:08.213 elif tag == 'insert':
2025-07-01 17:49:08.213 btags += '+' * lb
2025-07-01 17:49:08.213 elif tag == 'equal':
2025-07-01 17:49:08.213 atags += ' ' * la
2025-07-01 17:49:08.213 btags += ' ' * lb
2025-07-01 17:49:08.213 else:
2025-07-01 17:49:08.213 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.213 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.213 else:
2025-07-01 17:49:08.213 # the synch pair is identical
2025-07-01 17:49:08.213 yield ' ' + aelt
2025-07-01 17:49:08.213
2025-07-01 17:49:08.213 # pump out diffs from after the synch point
2025-07-01 17:49:08.213 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.213
2025-07-01 17:49:08.213 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.214 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.214
2025-07-01 17:49:08.214 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.214 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.214 alo = 363, ahi = 1101
2025-07-01 17:49:08.214 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.214 blo = 363, bhi = 1101
2025-07-01 17:49:08.214
2025-07-01 17:49:08.214 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.214 g = []
2025-07-01 17:49:08.214 if alo < ahi:
2025-07-01 17:49:08.214 if blo < bhi:
2025-07-01 17:49:08.214 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.214 else:
2025-07-01 17:49:08.214 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.214 elif blo < bhi:
2025-07-01 17:49:08.214 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.214
2025-07-01 17:49:08.214 > yield from g
2025-07-01 17:49:08.214
2025-07-01 17:49:08.215 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.215 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.215
2025-07-01 17:49:08.215 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.215 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.215 alo = 363, ahi = 1101
2025-07-01 17:49:08.215 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.215 blo = 363, bhi = 1101
2025-07-01 17:49:08.215
2025-07-01 17:49:08.215 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.215 r"""
2025-07-01 17:49:08.215 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.215 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.215 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.215 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.215
2025-07-01 17:49:08.215 Example:
2025-07-01 17:49:08.215
2025-07-01 17:49:08.215 >>> d = Differ()
2025-07-01 17:49:08.215 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.215 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.215 >>> print(''.join(results), end="")
2025-07-01 17:49:08.216 - abcDefghiJkl
2025-07-01 17:49:08.216 + abcdefGhijkl
2025-07-01 17:49:08.216 """
2025-07-01 17:49:08.216
2025-07-01 17:49:08.216 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.216 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.216 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.216 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.216 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.216
2025-07-01 17:49:08.216 # search for the pair that matches best without being identical
2025-07-01 17:49:08.216 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.216 # on junk -- unless we have to)
2025-07-01 17:49:08.216 for j in range(blo, bhi):
2025-07-01 17:49:08.216 bj = b[j]
2025-07-01 17:49:08.216 cruncher.set_seq2(bj)
2025-07-01 17:49:08.216 for i in range(alo, ahi):
2025-07-01 17:49:08.216 ai = a[i]
2025-07-01 17:49:08.216 if ai == bj:
2025-07-01 17:49:08.219 if eqi is None:
2025-07-01 17:49:08.220 eqi, eqj = i, j
2025-07-01 17:49:08.220 continue
2025-07-01 17:49:08.220 cruncher.set_seq1(ai)
2025-07-01 17:49:08.220 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.220 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.220 # compares by a factor of 3.
2025-07-01 17:49:08.220 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.220 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.220 # of the computation is cached by cruncher
2025-07-01 17:49:08.220 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.220 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.220 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.220 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.220 if best_ratio < cutoff:
2025-07-01 17:49:08.220 # no non-identical "pretty close" pair
2025-07-01 17:49:08.220 if eqi is None:
2025-07-01 17:49:08.220 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.220 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.220 return
2025-07-01 17:49:08.220 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.220 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.221 else:
2025-07-01 17:49:08.221 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.221 eqi = None
2025-07-01 17:49:08.221
2025-07-01 17:49:08.221 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.221 # identical
2025-07-01 17:49:08.221
2025-07-01 17:49:08.221 # pump out diffs from before the synch point
2025-07-01 17:49:08.221 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.221
2025-07-01 17:49:08.221 # do intraline marking on the synch pair
2025-07-01 17:49:08.221 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.221 if eqi is None:
2025-07-01 17:49:08.221 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.221 atags = btags = ""
2025-07-01 17:49:08.221 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.221 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.221 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.221 if tag == 'replace':
2025-07-01 17:49:08.221 atags += '^' * la
2025-07-01 17:49:08.221 btags += '^' * lb
2025-07-01 17:49:08.221 elif tag == 'delete':
2025-07-01 17:49:08.222 atags += '-' * la
2025-07-01 17:49:08.222 elif tag == 'insert':
2025-07-01 17:49:08.222 btags += '+' * lb
2025-07-01 17:49:08.222 elif tag == 'equal':
2025-07-01 17:49:08.222 atags += ' ' * la
2025-07-01 17:49:08.222 btags += ' ' * lb
2025-07-01 17:49:08.222 else:
2025-07-01 17:49:08.222 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.222 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.222 else:
2025-07-01 17:49:08.222 # the synch pair is identical
2025-07-01 17:49:08.222 yield ' ' + aelt
2025-07-01 17:49:08.222
2025-07-01 17:49:08.222 # pump out diffs from after the synch point
2025-07-01 17:49:08.222 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.222
2025-07-01 17:49:08.222 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.222 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.222
2025-07-01 17:49:08.222 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.222 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.223 alo = 364, ahi = 1101
2025-07-01 17:49:08.223 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.223 blo = 364, bhi = 1101
2025-07-01 17:49:08.223
2025-07-01 17:49:08.223 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.223 g = []
2025-07-01 17:49:08.223 if alo < ahi:
2025-07-01 17:49:08.223 if blo < bhi:
2025-07-01 17:49:08.223 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.223 else:
2025-07-01 17:49:08.223 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.223 elif blo < bhi:
2025-07-01 17:49:08.223 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.223
2025-07-01 17:49:08.223 > yield from g
2025-07-01 17:49:08.223
2025-07-01 17:49:08.223 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.223 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.223
2025-07-01 17:49:08.224 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.224 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.224 alo = 364, ahi = 1101
2025-07-01 17:49:08.224 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.224 blo = 364, bhi = 1101
2025-07-01 17:49:08.224
2025-07-01 17:49:08.224 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.224 r"""
2025-07-01 17:49:08.224 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.224 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.224 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.224 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.224
2025-07-01 17:49:08.224 Example:
2025-07-01 17:49:08.224
2025-07-01 17:49:08.224 >>> d = Differ()
2025-07-01 17:49:08.224 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.224 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.224 >>> print(''.join(results), end="")
2025-07-01 17:49:08.224 - abcDefghiJkl
2025-07-01 17:49:08.225 + abcdefGhijkl
2025-07-01 17:49:08.225 """
2025-07-01 17:49:08.225
2025-07-01 17:49:08.225 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.225 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.225 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.225 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.225 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.225
2025-07-01 17:49:08.225 # search for the pair that matches best without being identical
2025-07-01 17:49:08.225 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.225 # on junk -- unless we have to)
2025-07-01 17:49:08.225 for j in range(blo, bhi):
2025-07-01 17:49:08.225 bj = b[j]
2025-07-01 17:49:08.225 cruncher.set_seq2(bj)
2025-07-01 17:49:08.225 for i in range(alo, ahi):
2025-07-01 17:49:08.225 ai = a[i]
2025-07-01 17:49:08.225 if ai == bj:
2025-07-01 17:49:08.226 if eqi is None:
2025-07-01 17:49:08.226 eqi, eqj = i, j
2025-07-01 17:49:08.226 continue
2025-07-01 17:49:08.226 cruncher.set_seq1(ai)
2025-07-01 17:49:08.226 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.226 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.226 # compares by a factor of 3.
2025-07-01 17:49:08.226 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.226 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.226 # of the computation is cached by cruncher
2025-07-01 17:49:08.226 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.226 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.226 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.226 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.226 if best_ratio < cutoff:
2025-07-01 17:49:08.226 # no non-identical "pretty close" pair
2025-07-01 17:49:08.226 if eqi is None:
2025-07-01 17:49:08.226 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.226 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.226 return
2025-07-01 17:49:08.226 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.226 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.227 else:
2025-07-01 17:49:08.227 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.227 eqi = None
2025-07-01 17:49:08.227
2025-07-01 17:49:08.227 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.227 # identical
2025-07-01 17:49:08.227
2025-07-01 17:49:08.227 # pump out diffs from before the synch point
2025-07-01 17:49:08.227 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.227
2025-07-01 17:49:08.227 # do intraline marking on the synch pair
2025-07-01 17:49:08.227 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.227 if eqi is None:
2025-07-01 17:49:08.227 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.227 atags = btags = ""
2025-07-01 17:49:08.227 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.227 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.227 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.227 if tag == 'replace':
2025-07-01 17:49:08.227 atags += '^' * la
2025-07-01 17:49:08.227 btags += '^' * lb
2025-07-01 17:49:08.228 elif tag == 'delete':
2025-07-01 17:49:08.228 atags += '-' * la
2025-07-01 17:49:08.228 elif tag == 'insert':
2025-07-01 17:49:08.228 btags += '+' * lb
2025-07-01 17:49:08.228 elif tag == 'equal':
2025-07-01 17:49:08.228 atags += ' ' * la
2025-07-01 17:49:08.228 btags += ' ' * lb
2025-07-01 17:49:08.228 else:
2025-07-01 17:49:08.228 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.228 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.228 else:
2025-07-01 17:49:08.228 # the synch pair is identical
2025-07-01 17:49:08.228 yield ' ' + aelt
2025-07-01 17:49:08.228
2025-07-01 17:49:08.228 # pump out diffs from after the synch point
2025-07-01 17:49:08.228 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.228
2025-07-01 17:49:08.228 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.228 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.228
2025-07-01 17:49:08.228 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.228 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.229 alo = 365, ahi = 1101
2025-07-01 17:49:08.229 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.229 blo = 365, bhi = 1101
2025-07-01 17:49:08.229
2025-07-01 17:49:08.229 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.229 g = []
2025-07-01 17:49:08.229 if alo < ahi:
2025-07-01 17:49:08.229 if blo < bhi:
2025-07-01 17:49:08.229 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.229 else:
2025-07-01 17:49:08.229 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.229 elif blo < bhi:
2025-07-01 17:49:08.229 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.229
2025-07-01 17:49:08.229 > yield from g
2025-07-01 17:49:08.229
2025-07-01 17:49:08.229 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.229 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.229
2025-07-01 17:49:08.229 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.229 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.229 alo = 365, ahi = 1101
2025-07-01 17:49:08.230 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.230 blo = 365, bhi = 1101
2025-07-01 17:49:08.230
2025-07-01 17:49:08.230 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.230 r"""
2025-07-01 17:49:08.230 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.230 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.230 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.230 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.230
2025-07-01 17:49:08.230 Example:
2025-07-01 17:49:08.230
2025-07-01 17:49:08.230 >>> d = Differ()
2025-07-01 17:49:08.230 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.230 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.230 >>> print(''.join(results), end="")
2025-07-01 17:49:08.230 - abcDefghiJkl
2025-07-01 17:49:08.230 + abcdefGhijkl
2025-07-01 17:49:08.230 """
2025-07-01 17:49:08.231
2025-07-01 17:49:08.231 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.231 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.231 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.231 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.231 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.231
2025-07-01 17:49:08.231 # search for the pair that matches best without being identical
2025-07-01 17:49:08.231 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.231 # on junk -- unless we have to)
2025-07-01 17:49:08.231 for j in range(blo, bhi):
2025-07-01 17:49:08.231 bj = b[j]
2025-07-01 17:49:08.231 cruncher.set_seq2(bj)
2025-07-01 17:49:08.231 for i in range(alo, ahi):
2025-07-01 17:49:08.231 ai = a[i]
2025-07-01 17:49:08.231 if ai == bj:
2025-07-01 17:49:08.231 if eqi is None:
2025-07-01 17:49:08.231 eqi, eqj = i, j
2025-07-01 17:49:08.231 continue
2025-07-01 17:49:08.231 cruncher.set_seq1(ai)
2025-07-01 17:49:08.231 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.231 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.238 # compares by a factor of 3.
2025-07-01 17:49:08.238 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.238 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.238 # of the computation is cached by cruncher
2025-07-01 17:49:08.238 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.238 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.238 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.238 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.238 if best_ratio < cutoff:
2025-07-01 17:49:08.238 # no non-identical "pretty close" pair
2025-07-01 17:49:08.238 if eqi is None:
2025-07-01 17:49:08.238 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.238 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.238 return
2025-07-01 17:49:08.238 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.238 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.238 else:
2025-07-01 17:49:08.239 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.239 eqi = None
2025-07-01 17:49:08.239
2025-07-01 17:49:08.239 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.239 # identical
2025-07-01 17:49:08.239
2025-07-01 17:49:08.239 # pump out diffs from before the synch point
2025-07-01 17:49:08.239 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.239
2025-07-01 17:49:08.239 # do intraline marking on the synch pair
2025-07-01 17:49:08.239 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.239 if eqi is None:
2025-07-01 17:49:08.239 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.239 atags = btags = ""
2025-07-01 17:49:08.239 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.239 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.239 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.239 if tag == 'replace':
2025-07-01 17:49:08.239 atags += '^' * la
2025-07-01 17:49:08.239 btags += '^' * lb
2025-07-01 17:49:08.240 elif tag == 'delete':
2025-07-01 17:49:08.240 atags += '-' * la
2025-07-01 17:49:08.240 elif tag == 'insert':
2025-07-01 17:49:08.240 btags += '+' * lb
2025-07-01 17:49:08.240 elif tag == 'equal':
2025-07-01 17:49:08.240 atags += ' ' * la
2025-07-01 17:49:08.240 btags += ' ' * lb
2025-07-01 17:49:08.240 else:
2025-07-01 17:49:08.240 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.240 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.240 else:
2025-07-01 17:49:08.240 # the synch pair is identical
2025-07-01 17:49:08.240 yield ' ' + aelt
2025-07-01 17:49:08.240
2025-07-01 17:49:08.240 # pump out diffs from after the synch point
2025-07-01 17:49:08.240 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.240
2025-07-01 17:49:08.240 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.240 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.240
2025-07-01 17:49:08.241 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.241 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.241 alo = 366, ahi = 1101
2025-07-01 17:49:08.241 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.241 blo = 366, bhi = 1101
2025-07-01 17:49:08.241
2025-07-01 17:49:08.241 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.241 g = []
2025-07-01 17:49:08.241 if alo < ahi:
2025-07-01 17:49:08.241 if blo < bhi:
2025-07-01 17:49:08.241 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.241 else:
2025-07-01 17:49:08.241 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.241 elif blo < bhi:
2025-07-01 17:49:08.241 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.241
2025-07-01 17:49:08.241 > yield from g
2025-07-01 17:49:08.241
2025-07-01 17:49:08.241 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.241 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.242
2025-07-01 17:49:08.242 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.242 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.242 alo = 366, ahi = 1101
2025-07-01 17:49:08.242 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.242 blo = 366, bhi = 1101
2025-07-01 17:49:08.242
2025-07-01 17:49:08.242 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.242 r"""
2025-07-01 17:49:08.242 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.242 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.242 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.242 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.242
2025-07-01 17:49:08.242 Example:
2025-07-01 17:49:08.242
2025-07-01 17:49:08.242 >>> d = Differ()
2025-07-01 17:49:08.242 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.242 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.242 >>> print(''.join(results), end="")
2025-07-01 17:49:08.243 - abcDefghiJkl
2025-07-01 17:49:08.243 + abcdefGhijkl
2025-07-01 17:49:08.243 """
2025-07-01 17:49:08.243
2025-07-01 17:49:08.243 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.243 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.243 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.243 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.243 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.243
2025-07-01 17:49:08.243 # search for the pair that matches best without being identical
2025-07-01 17:49:08.243 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.243 # on junk -- unless we have to)
2025-07-01 17:49:08.243 for j in range(blo, bhi):
2025-07-01 17:49:08.243 bj = b[j]
2025-07-01 17:49:08.243 cruncher.set_seq2(bj)
2025-07-01 17:49:08.243 for i in range(alo, ahi):
2025-07-01 17:49:08.243 ai = a[i]
2025-07-01 17:49:08.244 if ai == bj:
2025-07-01 17:49:08.244 if eqi is None:
2025-07-01 17:49:08.244 eqi, eqj = i, j
2025-07-01 17:49:08.244 continue
2025-07-01 17:49:08.244 cruncher.set_seq1(ai)
2025-07-01 17:49:08.244 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.244 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.244 # compares by a factor of 3.
2025-07-01 17:49:08.244 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.244 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.244 # of the computation is cached by cruncher
2025-07-01 17:49:08.244 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.244 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.244 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.244 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.244 if best_ratio < cutoff:
2025-07-01 17:49:08.244 # no non-identical "pretty close" pair
2025-07-01 17:49:08.244 if eqi is None:
2025-07-01 17:49:08.244 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.244 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.245 return
2025-07-01 17:49:08.245 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.245 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.245 else:
2025-07-01 17:49:08.245 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.245 eqi = None
2025-07-01 17:49:08.245
2025-07-01 17:49:08.245 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.245 # identical
2025-07-01 17:49:08.245
2025-07-01 17:49:08.245 # pump out diffs from before the synch point
2025-07-01 17:49:08.245 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.245
2025-07-01 17:49:08.245 # do intraline marking on the synch pair
2025-07-01 17:49:08.245 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.245 if eqi is None:
2025-07-01 17:49:08.245 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.245 atags = btags = ""
2025-07-01 17:49:08.245 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.245 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.245 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.246 if tag == 'replace':
2025-07-01 17:49:08.246 atags += '^' * la
2025-07-01 17:49:08.246 btags += '^' * lb
2025-07-01 17:49:08.246 elif tag == 'delete':
2025-07-01 17:49:08.246 atags += '-' * la
2025-07-01 17:49:08.246 elif tag == 'insert':
2025-07-01 17:49:08.246 btags += '+' * lb
2025-07-01 17:49:08.246 elif tag == 'equal':
2025-07-01 17:49:08.246 atags += ' ' * la
2025-07-01 17:49:08.246 btags += ' ' * lb
2025-07-01 17:49:08.246 else:
2025-07-01 17:49:08.246 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.246 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.246 else:
2025-07-01 17:49:08.246 # the synch pair is identical
2025-07-01 17:49:08.246 yield ' ' + aelt
2025-07-01 17:49:08.246
2025-07-01 17:49:08.246 # pump out diffs from after the synch point
2025-07-01 17:49:08.247 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.247
2025-07-01 17:49:08.247 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.247 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.247
2025-07-01 17:49:08.247 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.247 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.247 alo = 367, ahi = 1101
2025-07-01 17:49:08.247 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.247 blo = 367, bhi = 1101
2025-07-01 17:49:08.247
2025-07-01 17:49:08.247 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.247 g = []
2025-07-01 17:49:08.247 if alo < ahi:
2025-07-01 17:49:08.247 if blo < bhi:
2025-07-01 17:49:08.247 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.247 else:
2025-07-01 17:49:08.247 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.247 elif blo < bhi:
2025-07-01 17:49:08.247 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.248
2025-07-01 17:49:08.250 > yield from g
2025-07-01 17:49:08.250
2025-07-01 17:49:08.251 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.251 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.251
2025-07-01 17:49:08.251 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.251 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.251 alo = 367, ahi = 1101
2025-07-01 17:49:08.251 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.251 blo = 367, bhi = 1101
2025-07-01 17:49:08.251
2025-07-01 17:49:08.251 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.251 r"""
2025-07-01 17:49:08.251 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.251 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.251 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.251 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.251
2025-07-01 17:49:08.251 Example:
2025-07-01 17:49:08.251
2025-07-01 17:49:08.251 >>> d = Differ()
2025-07-01 17:49:08.251 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.252 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.252 >>> print(''.join(results), end="")
2025-07-01 17:49:08.252 - abcDefghiJkl
2025-07-01 17:49:08.252 + abcdefGhijkl
2025-07-01 17:49:08.252 """
2025-07-01 17:49:08.252
2025-07-01 17:49:08.252 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.252 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.252 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.252 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.252 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.252
2025-07-01 17:49:08.252 # search for the pair that matches best without being identical
2025-07-01 17:49:08.252 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.252 # on junk -- unless we have to)
2025-07-01 17:49:08.252 for j in range(blo, bhi):
2025-07-01 17:49:08.252 bj = b[j]
2025-07-01 17:49:08.252 cruncher.set_seq2(bj)
2025-07-01 17:49:08.252 for i in range(alo, ahi):
2025-07-01 17:49:08.253 ai = a[i]
2025-07-01 17:49:08.253 if ai == bj:
2025-07-01 17:49:08.253 if eqi is None:
2025-07-01 17:49:08.253 eqi, eqj = i, j
2025-07-01 17:49:08.253 continue
2025-07-01 17:49:08.253 cruncher.set_seq1(ai)
2025-07-01 17:49:08.253 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.253 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.253 # compares by a factor of 3.
2025-07-01 17:49:08.253 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.253 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.253 # of the computation is cached by cruncher
2025-07-01 17:49:08.253 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.253 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.253 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.253 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.253 if best_ratio < cutoff:
2025-07-01 17:49:08.253 # no non-identical "pretty close" pair
2025-07-01 17:49:08.253 if eqi is None:
2025-07-01 17:49:08.253 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.253 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.253 return
2025-07-01 17:49:08.254 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.254 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.254 else:
2025-07-01 17:49:08.254 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.254 eqi = None
2025-07-01 17:49:08.254
2025-07-01 17:49:08.254 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.254 # identical
2025-07-01 17:49:08.254
2025-07-01 17:49:08.254 # pump out diffs from before the synch point
2025-07-01 17:49:08.254 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.254
2025-07-01 17:49:08.254 # do intraline marking on the synch pair
2025-07-01 17:49:08.254 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.254 if eqi is None:
2025-07-01 17:49:08.254 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.254 atags = btags = ""
2025-07-01 17:49:08.254 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.254 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.254 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.255 if tag == 'replace':
2025-07-01 17:49:08.255 atags += '^' * la
2025-07-01 17:49:08.255 btags += '^' * lb
2025-07-01 17:49:08.255 elif tag == 'delete':
2025-07-01 17:49:08.255 atags += '-' * la
2025-07-01 17:49:08.255 elif tag == 'insert':
2025-07-01 17:49:08.255 btags += '+' * lb
2025-07-01 17:49:08.255 elif tag == 'equal':
2025-07-01 17:49:08.255 atags += ' ' * la
2025-07-01 17:49:08.255 btags += ' ' * lb
2025-07-01 17:49:08.255 else:
2025-07-01 17:49:08.255 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.255 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.255 else:
2025-07-01 17:49:08.255 # the synch pair is identical
2025-07-01 17:49:08.255 yield ' ' + aelt
2025-07-01 17:49:08.255
2025-07-01 17:49:08.255 # pump out diffs from after the synch point
2025-07-01 17:49:08.255 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.255
2025-07-01 17:49:08.255 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.256 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.256
2025-07-01 17:49:08.256 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.256 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.256 alo = 368, ahi = 1101
2025-07-01 17:49:08.256 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.256 blo = 368, bhi = 1101
2025-07-01 17:49:08.256
2025-07-01 17:49:08.256 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.256 g = []
2025-07-01 17:49:08.256 if alo < ahi:
2025-07-01 17:49:08.256 if blo < bhi:
2025-07-01 17:49:08.256 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.256 else:
2025-07-01 17:49:08.256 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.256 elif blo < bhi:
2025-07-01 17:49:08.256 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.256
2025-07-01 17:49:08.256 > yield from g
2025-07-01 17:49:08.256
2025-07-01 17:49:08.256 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.257 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.257
2025-07-01 17:49:08.257 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.257 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.257 alo = 368, ahi = 1101
2025-07-01 17:49:08.257 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.257 blo = 368, bhi = 1101
2025-07-01 17:49:08.257
2025-07-01 17:49:08.257 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.257 r"""
2025-07-01 17:49:08.257 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.257 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.257 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.257 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.257
2025-07-01 17:49:08.257 Example:
2025-07-01 17:49:08.257
2025-07-01 17:49:08.257 >>> d = Differ()
2025-07-01 17:49:08.257 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.258 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.258 >>> print(''.join(results), end="")
2025-07-01 17:49:08.258 - abcDefghiJkl
2025-07-01 17:49:08.258 + abcdefGhijkl
2025-07-01 17:49:08.258 """
2025-07-01 17:49:08.258
2025-07-01 17:49:08.258 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.258 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.258 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.258 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.258 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.258
2025-07-01 17:49:08.258 # search for the pair that matches best without being identical
2025-07-01 17:49:08.258 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.258 # on junk -- unless we have to)
2025-07-01 17:49:08.258 for j in range(blo, bhi):
2025-07-01 17:49:08.258 bj = b[j]
2025-07-01 17:49:08.258 cruncher.set_seq2(bj)
2025-07-01 17:49:08.258 for i in range(alo, ahi):
2025-07-01 17:49:08.258 ai = a[i]
2025-07-01 17:49:08.258 if ai == bj:
2025-07-01 17:49:08.258 if eqi is None:
2025-07-01 17:49:08.258 eqi, eqj = i, j
2025-07-01 17:49:08.258 continue
2025-07-01 17:49:08.258 cruncher.set_seq1(ai)
2025-07-01 17:49:08.258 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.258 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.258 # compares by a factor of 3.
2025-07-01 17:49:08.258 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.258 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.258 # of the computation is cached by cruncher
2025-07-01 17:49:08.258 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.258 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.259 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.259 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.259 if best_ratio < cutoff:
2025-07-01 17:49:08.259 # no non-identical "pretty close" pair
2025-07-01 17:49:08.259 if eqi is None:
2025-07-01 17:49:08.259 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.259 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.259 return
2025-07-01 17:49:08.259 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.259 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.259 else:
2025-07-01 17:49:08.259 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.259 eqi = None
2025-07-01 17:49:08.259
2025-07-01 17:49:08.259 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.259 # identical
2025-07-01 17:49:08.259
2025-07-01 17:49:08.259 # pump out diffs from before the synch point
2025-07-01 17:49:08.259 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.259
2025-07-01 17:49:08.259 # do intraline marking on the synch pair
2025-07-01 17:49:08.260 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.260 if eqi is None:
2025-07-01 17:49:08.260 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.260 atags = btags = ""
2025-07-01 17:49:08.260 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.260 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.260 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.260 if tag == 'replace':
2025-07-01 17:49:08.260 atags += '^' * la
2025-07-01 17:49:08.260 btags += '^' * lb
2025-07-01 17:49:08.260 elif tag == 'delete':
2025-07-01 17:49:08.260 atags += '-' * la
2025-07-01 17:49:08.260 elif tag == 'insert':
2025-07-01 17:49:08.260 btags += '+' * lb
2025-07-01 17:49:08.260 elif tag == 'equal':
2025-07-01 17:49:08.260 atags += ' ' * la
2025-07-01 17:49:08.260 btags += ' ' * lb
2025-07-01 17:49:08.260 else:
2025-07-01 17:49:08.260 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.260 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.260 else:
2025-07-01 17:49:08.260 # the synch pair is identical
2025-07-01 17:49:08.261 yield ' ' + aelt
2025-07-01 17:49:08.261
2025-07-01 17:49:08.261 # pump out diffs from after the synch point
2025-07-01 17:49:08.261 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.261
2025-07-01 17:49:08.261 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.261 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.261
2025-07-01 17:49:08.261 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.261 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.261 alo = 369, ahi = 1101
2025-07-01 17:49:08.261 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.261 blo = 369, bhi = 1101
2025-07-01 17:49:08.261
2025-07-01 17:49:08.261 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.261 g = []
2025-07-01 17:49:08.261 if alo < ahi:
2025-07-01 17:49:08.261 if blo < bhi:
2025-07-01 17:49:08.261 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.261 else:
2025-07-01 17:49:08.262 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.262 elif blo < bhi:
2025-07-01 17:49:08.262 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.262
2025-07-01 17:49:08.262 > yield from g
2025-07-01 17:49:08.262
2025-07-01 17:49:08.262 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.262 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.262
2025-07-01 17:49:08.262 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.262 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.262 alo = 369, ahi = 1101
2025-07-01 17:49:08.262 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.262 blo = 369, bhi = 1101
2025-07-01 17:49:08.262
2025-07-01 17:49:08.262 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.262 r"""
2025-07-01 17:49:08.262 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.262 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.262 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.262 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.262
2025-07-01 17:49:08.269 Example:
2025-07-01 17:49:08.269
2025-07-01 17:49:08.269 >>> d = Differ()
2025-07-01 17:49:08.269 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.269 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.269 >>> print(''.join(results), end="")
2025-07-01 17:49:08.269 - abcDefghiJkl
2025-07-01 17:49:08.269 + abcdefGhijkl
2025-07-01 17:49:08.269 """
2025-07-01 17:49:08.269
2025-07-01 17:49:08.269 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.269 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.269 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.269 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.269 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.269
2025-07-01 17:49:08.270 # search for the pair that matches best without being identical
2025-07-01 17:49:08.270 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.270 # on junk -- unless we have to)
2025-07-01 17:49:08.270 for j in range(blo, bhi):
2025-07-01 17:49:08.270 bj = b[j]
2025-07-01 17:49:08.270 cruncher.set_seq2(bj)
2025-07-01 17:49:08.270 for i in range(alo, ahi):
2025-07-01 17:49:08.270 ai = a[i]
2025-07-01 17:49:08.270 if ai == bj:
2025-07-01 17:49:08.270 if eqi is None:
2025-07-01 17:49:08.270 eqi, eqj = i, j
2025-07-01 17:49:08.270 continue
2025-07-01 17:49:08.270 cruncher.set_seq1(ai)
2025-07-01 17:49:08.270 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.270 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.270 # compares by a factor of 3.
2025-07-01 17:49:08.270 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.270 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.270 # of the computation is cached by cruncher
2025-07-01 17:49:08.270 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.271 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.271 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.271 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.271 if best_ratio < cutoff:
2025-07-01 17:49:08.271 # no non-identical "pretty close" pair
2025-07-01 17:49:08.271 if eqi is None:
2025-07-01 17:49:08.271 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.271 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.271 return
2025-07-01 17:49:08.271 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.271 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.271 else:
2025-07-01 17:49:08.271 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.271 eqi = None
2025-07-01 17:49:08.271
2025-07-01 17:49:08.271 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.271 # identical
2025-07-01 17:49:08.271
2025-07-01 17:49:08.271 # pump out diffs from before the synch point
2025-07-01 17:49:08.271 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.272
2025-07-01 17:49:08.272 # do intraline marking on the synch pair
2025-07-01 17:49:08.272 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.272 if eqi is None:
2025-07-01 17:49:08.272 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.272 atags = btags = ""
2025-07-01 17:49:08.272 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.272 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.272 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.272 if tag == 'replace':
2025-07-01 17:49:08.272 atags += '^' * la
2025-07-01 17:49:08.272 btags += '^' * lb
2025-07-01 17:49:08.272 elif tag == 'delete':
2025-07-01 17:49:08.272 atags += '-' * la
2025-07-01 17:49:08.272 elif tag == 'insert':
2025-07-01 17:49:08.272 btags += '+' * lb
2025-07-01 17:49:08.272 elif tag == 'equal':
2025-07-01 17:49:08.272 atags += ' ' * la
2025-07-01 17:49:08.272 btags += ' ' * lb
2025-07-01 17:49:08.272 else:
2025-07-01 17:49:08.273 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.273 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.273 else:
2025-07-01 17:49:08.273 # the synch pair is identical
2025-07-01 17:49:08.273 yield ' ' + aelt
2025-07-01 17:49:08.273
2025-07-01 17:49:08.273 # pump out diffs from after the synch point
2025-07-01 17:49:08.273 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.273
2025-07-01 17:49:08.273 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.273 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.273
2025-07-01 17:49:08.273 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.273 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.273 alo = 370, ahi = 1101
2025-07-01 17:49:08.273 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.273 blo = 370, bhi = 1101
2025-07-01 17:49:08.273
2025-07-01 17:49:08.273 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.273 g = []
2025-07-01 17:49:08.273 if alo < ahi:
2025-07-01 17:49:08.274 if blo < bhi:
2025-07-01 17:49:08.274 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.274 else:
2025-07-01 17:49:08.274 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.274 elif blo < bhi:
2025-07-01 17:49:08.274 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.274
2025-07-01 17:49:08.274 > yield from g
2025-07-01 17:49:08.274
2025-07-01 17:49:08.274 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.274 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.274
2025-07-01 17:49:08.274 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.274 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.274 alo = 370, ahi = 1101
2025-07-01 17:49:08.274 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.274 blo = 370, bhi = 1101
2025-07-01 17:49:08.274
2025-07-01 17:49:08.274 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.274 r"""
2025-07-01 17:49:08.274 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.275 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.275 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.275 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.275
2025-07-01 17:49:08.275 Example:
2025-07-01 17:49:08.275
2025-07-01 17:49:08.275 >>> d = Differ()
2025-07-01 17:49:08.275 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.275 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.275 >>> print(''.join(results), end="")
2025-07-01 17:49:08.275 - abcDefghiJkl
2025-07-01 17:49:08.275 + abcdefGhijkl
2025-07-01 17:49:08.275 """
2025-07-01 17:49:08.275
2025-07-01 17:49:08.275 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.275 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.275 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.275 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.275 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.275
2025-07-01 17:49:08.276 # search for the pair that matches best without being identical
2025-07-01 17:49:08.276 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.276 # on junk -- unless we have to)
2025-07-01 17:49:08.276 for j in range(blo, bhi):
2025-07-01 17:49:08.276 bj = b[j]
2025-07-01 17:49:08.276 cruncher.set_seq2(bj)
2025-07-01 17:49:08.276 for i in range(alo, ahi):
2025-07-01 17:49:08.276 ai = a[i]
2025-07-01 17:49:08.276 if ai == bj:
2025-07-01 17:49:08.276 if eqi is None:
2025-07-01 17:49:08.276 eqi, eqj = i, j
2025-07-01 17:49:08.276 continue
2025-07-01 17:49:08.276 cruncher.set_seq1(ai)
2025-07-01 17:49:08.276 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.276 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.276 # compares by a factor of 3.
2025-07-01 17:49:08.276 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.276 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.276 # of the computation is cached by cruncher
2025-07-01 17:49:08.276 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.276 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.276 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.277 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.277 if best_ratio < cutoff:
2025-07-01 17:49:08.277 # no non-identical "pretty close" pair
2025-07-01 17:49:08.277 if eqi is None:
2025-07-01 17:49:08.277 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.277 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.277 return
2025-07-01 17:49:08.277 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.277 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.277 else:
2025-07-01 17:49:08.277 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.277 eqi = None
2025-07-01 17:49:08.277
2025-07-01 17:49:08.277 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.277 # identical
2025-07-01 17:49:08.277
2025-07-01 17:49:08.277 # pump out diffs from before the synch point
2025-07-01 17:49:08.277 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.277
2025-07-01 17:49:08.277 # do intraline marking on the synch pair
2025-07-01 17:49:08.277 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.278 if eqi is None:
2025-07-01 17:49:08.278 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.278 atags = btags = ""
2025-07-01 17:49:08.278 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.278 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.278 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.278 if tag == 'replace':
2025-07-01 17:49:08.278 atags += '^' * la
2025-07-01 17:49:08.278 btags += '^' * lb
2025-07-01 17:49:08.278 elif tag == 'delete':
2025-07-01 17:49:08.278 atags += '-' * la
2025-07-01 17:49:08.278 elif tag == 'insert':
2025-07-01 17:49:08.278 btags += '+' * lb
2025-07-01 17:49:08.278 elif tag == 'equal':
2025-07-01 17:49:08.278 atags += ' ' * la
2025-07-01 17:49:08.278 btags += ' ' * lb
2025-07-01 17:49:08.278 else:
2025-07-01 17:49:08.278 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.278 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.278 else:
2025-07-01 17:49:08.278 # the synch pair is identical
2025-07-01 17:49:08.278 yield ' ' + aelt
2025-07-01 17:49:08.281
2025-07-01 17:49:08.281 # pump out diffs from after the synch point
2025-07-01 17:49:08.282 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.282
2025-07-01 17:49:08.282 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.282 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.282
2025-07-01 17:49:08.282 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.282 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.282 alo = 371, ahi = 1101
2025-07-01 17:49:08.282 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.282 blo = 371, bhi = 1101
2025-07-01 17:49:08.282
2025-07-01 17:49:08.282 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.282 g = []
2025-07-01 17:49:08.282 if alo < ahi:
2025-07-01 17:49:08.282 if blo < bhi:
2025-07-01 17:49:08.282 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.282 else:
2025-07-01 17:49:08.282 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.283 elif blo < bhi:
2025-07-01 17:49:08.283 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.283
2025-07-01 17:49:08.283 > yield from g
2025-07-01 17:49:08.283
2025-07-01 17:49:08.283 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.283 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.283
2025-07-01 17:49:08.283 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.283 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.283 alo = 371, ahi = 1101
2025-07-01 17:49:08.283 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.283 blo = 371, bhi = 1101
2025-07-01 17:49:08.283
2025-07-01 17:49:08.283 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.283 r"""
2025-07-01 17:49:08.283 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.283 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.283 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.283 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.283
2025-07-01 17:49:08.283 Example:
2025-07-01 17:49:08.283
2025-07-01 17:49:08.283 >>> d = Differ()
2025-07-01 17:49:08.283 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.283 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.283 >>> print(''.join(results), end="")
2025-07-01 17:49:08.283 - abcDefghiJkl
2025-07-01 17:49:08.283 + abcdefGhijkl
2025-07-01 17:49:08.283 """
2025-07-01 17:49:08.283
2025-07-01 17:49:08.283 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.284 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.284 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.284 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.284 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.284
2025-07-01 17:49:08.284 # search for the pair that matches best without being identical
2025-07-01 17:49:08.284 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.284 # on junk -- unless we have to)
2025-07-01 17:49:08.284 for j in range(blo, bhi):
2025-07-01 17:49:08.284 bj = b[j]
2025-07-01 17:49:08.284 cruncher.set_seq2(bj)
2025-07-01 17:49:08.284 for i in range(alo, ahi):
2025-07-01 17:49:08.284 ai = a[i]
2025-07-01 17:49:08.284 if ai == bj:
2025-07-01 17:49:08.284 if eqi is None:
2025-07-01 17:49:08.284 eqi, eqj = i, j
2025-07-01 17:49:08.284 continue
2025-07-01 17:49:08.284 cruncher.set_seq1(ai)
2025-07-01 17:49:08.285 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.285 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.285 # compares by a factor of 3.
2025-07-01 17:49:08.285 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.285 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.285 # of the computation is cached by cruncher
2025-07-01 17:49:08.285 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.285 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.285 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.285 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.285 if best_ratio < cutoff:
2025-07-01 17:49:08.285 # no non-identical "pretty close" pair
2025-07-01 17:49:08.285 if eqi is None:
2025-07-01 17:49:08.285 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.285 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.285 return
2025-07-01 17:49:08.285 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.285 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.285 else:
2025-07-01 17:49:08.285 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.286 eqi = None
2025-07-01 17:49:08.286
2025-07-01 17:49:08.286 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.286 # identical
2025-07-01 17:49:08.286
2025-07-01 17:49:08.286 # pump out diffs from before the synch point
2025-07-01 17:49:08.286 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.286
2025-07-01 17:49:08.286 # do intraline marking on the synch pair
2025-07-01 17:49:08.286 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.286 if eqi is None:
2025-07-01 17:49:08.286 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.286 atags = btags = ""
2025-07-01 17:49:08.286 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.286 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.286 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.286 if tag == 'replace':
2025-07-01 17:49:08.286 atags += '^' * la
2025-07-01 17:49:08.286 btags += '^' * lb
2025-07-01 17:49:08.286 elif tag == 'delete':
2025-07-01 17:49:08.286 atags += '-' * la
2025-07-01 17:49:08.286 elif tag == 'insert':
2025-07-01 17:49:08.287 btags += '+' * lb
2025-07-01 17:49:08.287 elif tag == 'equal':
2025-07-01 17:49:08.287 atags += ' ' * la
2025-07-01 17:49:08.287 btags += ' ' * lb
2025-07-01 17:49:08.287 else:
2025-07-01 17:49:08.287 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.287 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.287 else:
2025-07-01 17:49:08.287 # the synch pair is identical
2025-07-01 17:49:08.287 yield ' ' + aelt
2025-07-01 17:49:08.287
2025-07-01 17:49:08.287 # pump out diffs from after the synch point
2025-07-01 17:49:08.287 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.287
2025-07-01 17:49:08.287 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.287 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.287
2025-07-01 17:49:08.287 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.287 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.287 alo = 372, ahi = 1101
2025-07-01 17:49:08.288 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.288 blo = 372, bhi = 1101
2025-07-01 17:49:08.288
2025-07-01 17:49:08.288 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.288 g = []
2025-07-01 17:49:08.288 if alo < ahi:
2025-07-01 17:49:08.288 if blo < bhi:
2025-07-01 17:49:08.288 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.288 else:
2025-07-01 17:49:08.288 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.288 elif blo < bhi:
2025-07-01 17:49:08.288 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.288
2025-07-01 17:49:08.288 > yield from g
2025-07-01 17:49:08.288
2025-07-01 17:49:08.288 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.288 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.288
2025-07-01 17:49:08.288 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.288 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.288 alo = 372, ahi = 1101
2025-07-01 17:49:08.288 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.289 blo = 372, bhi = 1101
2025-07-01 17:49:08.289
2025-07-01 17:49:08.289 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.289 r"""
2025-07-01 17:49:08.289 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.289 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.289 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.289 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.289
2025-07-01 17:49:08.289 Example:
2025-07-01 17:49:08.289
2025-07-01 17:49:08.289 >>> d = Differ()
2025-07-01 17:49:08.289 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.289 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.289 >>> print(''.join(results), end="")
2025-07-01 17:49:08.289 - abcDefghiJkl
2025-07-01 17:49:08.289 + abcdefGhijkl
2025-07-01 17:49:08.289 """
2025-07-01 17:49:08.289
2025-07-01 17:49:08.290 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.290 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.290 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.290 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.290 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.290
2025-07-01 17:49:08.290 # search for the pair that matches best without being identical
2025-07-01 17:49:08.290 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.290 # on junk -- unless we have to)
2025-07-01 17:49:08.290 for j in range(blo, bhi):
2025-07-01 17:49:08.290 bj = b[j]
2025-07-01 17:49:08.290 cruncher.set_seq2(bj)
2025-07-01 17:49:08.290 for i in range(alo, ahi):
2025-07-01 17:49:08.290 ai = a[i]
2025-07-01 17:49:08.290 if ai == bj:
2025-07-01 17:49:08.290 if eqi is None:
2025-07-01 17:49:08.290 eqi, eqj = i, j
2025-07-01 17:49:08.290 continue
2025-07-01 17:49:08.290 cruncher.set_seq1(ai)
2025-07-01 17:49:08.291 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.291 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.291 # compares by a factor of 3.
2025-07-01 17:49:08.291 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.291 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.291 # of the computation is cached by cruncher
2025-07-01 17:49:08.291 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.291 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.291 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.291 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.291 if best_ratio < cutoff:
2025-07-01 17:49:08.291 # no non-identical "pretty close" pair
2025-07-01 17:49:08.291 if eqi is None:
2025-07-01 17:49:08.291 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.291 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.291 return
2025-07-01 17:49:08.291 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.291 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.291 else:
2025-07-01 17:49:08.292 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.292 eqi = None
2025-07-01 17:49:08.292
2025-07-01 17:49:08.292 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.292 # identical
2025-07-01 17:49:08.292
2025-07-01 17:49:08.292 # pump out diffs from before the synch point
2025-07-01 17:49:08.292 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.292
2025-07-01 17:49:08.292 # do intraline marking on the synch pair
2025-07-01 17:49:08.292 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.292 if eqi is None:
2025-07-01 17:49:08.292 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.292 atags = btags = ""
2025-07-01 17:49:08.292 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.292 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.292 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.292 if tag == 'replace':
2025-07-01 17:49:08.292 atags += '^' * la
2025-07-01 17:49:08.292 btags += '^' * lb
2025-07-01 17:49:08.292 elif tag == 'delete':
2025-07-01 17:49:08.293 atags += '-' * la
2025-07-01 17:49:08.293 elif tag == 'insert':
2025-07-01 17:49:08.293 btags += '+' * lb
2025-07-01 17:49:08.293 elif tag == 'equal':
2025-07-01 17:49:08.293 atags += ' ' * la
2025-07-01 17:49:08.293 btags += ' ' * lb
2025-07-01 17:49:08.293 else:
2025-07-01 17:49:08.293 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.293 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.293 else:
2025-07-01 17:49:08.293 # the synch pair is identical
2025-07-01 17:49:08.293 yield ' ' + aelt
2025-07-01 17:49:08.293
2025-07-01 17:49:08.293 # pump out diffs from after the synch point
2025-07-01 17:49:08.293 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.293
2025-07-01 17:49:08.293 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.293 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.293
2025-07-01 17:49:08.293 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.293 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.294 alo = 373, ahi = 1101
2025-07-01 17:49:08.299 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.299 blo = 373, bhi = 1101
2025-07-01 17:49:08.299
2025-07-01 17:49:08.299 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.299 g = []
2025-07-01 17:49:08.299 if alo < ahi:
2025-07-01 17:49:08.299 if blo < bhi:
2025-07-01 17:49:08.299 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.299 else:
2025-07-01 17:49:08.299 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.299 elif blo < bhi:
2025-07-01 17:49:08.299 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.299
2025-07-01 17:49:08.299 > yield from g
2025-07-01 17:49:08.299
2025-07-01 17:49:08.299 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.299 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.299
2025-07-01 17:49:08.299 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.300 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.300 alo = 373, ahi = 1101
2025-07-01 17:49:08.300 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.300 blo = 373, bhi = 1101
2025-07-01 17:49:08.300
2025-07-01 17:49:08.300 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.300 r"""
2025-07-01 17:49:08.300 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.300 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.300 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.300 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.300
2025-07-01 17:49:08.300 Example:
2025-07-01 17:49:08.300
2025-07-01 17:49:08.300 >>> d = Differ()
2025-07-01 17:49:08.300 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.300 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.300 >>> print(''.join(results), end="")
2025-07-01 17:49:08.300 - abcDefghiJkl
2025-07-01 17:49:08.301 + abcdefGhijkl
2025-07-01 17:49:08.301 """
2025-07-01 17:49:08.301
2025-07-01 17:49:08.301 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.301 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.301 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.301 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.301 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.301
2025-07-01 17:49:08.301 # search for the pair that matches best without being identical
2025-07-01 17:49:08.301 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.301 # on junk -- unless we have to)
2025-07-01 17:49:08.301 for j in range(blo, bhi):
2025-07-01 17:49:08.301 bj = b[j]
2025-07-01 17:49:08.301 cruncher.set_seq2(bj)
2025-07-01 17:49:08.301 for i in range(alo, ahi):
2025-07-01 17:49:08.301 ai = a[i]
2025-07-01 17:49:08.301 if ai == bj:
2025-07-01 17:49:08.301 if eqi is None:
2025-07-01 17:49:08.301 eqi, eqj = i, j
2025-07-01 17:49:08.301 continue
2025-07-01 17:49:08.302 cruncher.set_seq1(ai)
2025-07-01 17:49:08.302 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.302 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.302 # compares by a factor of 3.
2025-07-01 17:49:08.302 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.302 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.302 # of the computation is cached by cruncher
2025-07-01 17:49:08.302 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.302 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.302 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.302 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.302 if best_ratio < cutoff:
2025-07-01 17:49:08.302 # no non-identical "pretty close" pair
2025-07-01 17:49:08.302 if eqi is None:
2025-07-01 17:49:08.302 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.302 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.302 return
2025-07-01 17:49:08.302 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.302 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.302 else:
2025-07-01 17:49:08.302 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.303 eqi = None
2025-07-01 17:49:08.303
2025-07-01 17:49:08.303 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.303 # identical
2025-07-01 17:49:08.303
2025-07-01 17:49:08.303 # pump out diffs from before the synch point
2025-07-01 17:49:08.303 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.303
2025-07-01 17:49:08.303 # do intraline marking on the synch pair
2025-07-01 17:49:08.303 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.303 if eqi is None:
2025-07-01 17:49:08.303 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.303 atags = btags = ""
2025-07-01 17:49:08.303 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.303 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.303 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.303 if tag == 'replace':
2025-07-01 17:49:08.303 atags += '^' * la
2025-07-01 17:49:08.303 btags += '^' * lb
2025-07-01 17:49:08.303 elif tag == 'delete':
2025-07-01 17:49:08.303 atags += '-' * la
2025-07-01 17:49:08.304 elif tag == 'insert':
2025-07-01 17:49:08.304 btags += '+' * lb
2025-07-01 17:49:08.304 elif tag == 'equal':
2025-07-01 17:49:08.304 atags += ' ' * la
2025-07-01 17:49:08.304 btags += ' ' * lb
2025-07-01 17:49:08.304 else:
2025-07-01 17:49:08.304 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.304 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.304 else:
2025-07-01 17:49:08.304 # the synch pair is identical
2025-07-01 17:49:08.304 yield ' ' + aelt
2025-07-01 17:49:08.304
2025-07-01 17:49:08.304 # pump out diffs from after the synch point
2025-07-01 17:49:08.304 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.304
2025-07-01 17:49:08.304 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.304 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.304
2025-07-01 17:49:08.305 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.305 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.305 alo = 374, ahi = 1101
2025-07-01 17:49:08.305 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.305 blo = 374, bhi = 1101
2025-07-01 17:49:08.305
2025-07-01 17:49:08.305 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.305 g = []
2025-07-01 17:49:08.305 if alo < ahi:
2025-07-01 17:49:08.305 if blo < bhi:
2025-07-01 17:49:08.305 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.305 else:
2025-07-01 17:49:08.305 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.305 elif blo < bhi:
2025-07-01 17:49:08.305 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.305
2025-07-01 17:49:08.305 > yield from g
2025-07-01 17:49:08.305
2025-07-01 17:49:08.305 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.305 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.306
2025-07-01 17:49:08.306 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.306 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.306 alo = 374, ahi = 1101
2025-07-01 17:49:08.306 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.306 blo = 374, bhi = 1101
2025-07-01 17:49:08.306
2025-07-01 17:49:08.306 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.306 r"""
2025-07-01 17:49:08.306 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.306 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.306 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.306 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.306
2025-07-01 17:49:08.306 Example:
2025-07-01 17:49:08.306
2025-07-01 17:49:08.306 >>> d = Differ()
2025-07-01 17:49:08.306 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.306 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.306 >>> print(''.join(results), end="")
2025-07-01 17:49:08.307 - abcDefghiJkl
2025-07-01 17:49:08.307 + abcdefGhijkl
2025-07-01 17:49:08.307 """
2025-07-01 17:49:08.307
2025-07-01 17:49:08.307 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.307 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.307 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.307 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.307 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.307
2025-07-01 17:49:08.307 # search for the pair that matches best without being identical
2025-07-01 17:49:08.307 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.307 # on junk -- unless we have to)
2025-07-01 17:49:08.307 for j in range(blo, bhi):
2025-07-01 17:49:08.307 bj = b[j]
2025-07-01 17:49:08.307 cruncher.set_seq2(bj)
2025-07-01 17:49:08.307 for i in range(alo, ahi):
2025-07-01 17:49:08.307 ai = a[i]
2025-07-01 17:49:08.307 if ai == bj:
2025-07-01 17:49:08.307 if eqi is None:
2025-07-01 17:49:08.308 eqi, eqj = i, j
2025-07-01 17:49:08.308 continue
2025-07-01 17:49:08.308 cruncher.set_seq1(ai)
2025-07-01 17:49:08.308 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.308 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.308 # compares by a factor of 3.
2025-07-01 17:49:08.308 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.308 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.308 # of the computation is cached by cruncher
2025-07-01 17:49:08.308 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.308 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.308 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.308 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.308 if best_ratio < cutoff:
2025-07-01 17:49:08.308 # no non-identical "pretty close" pair
2025-07-01 17:49:08.308 if eqi is None:
2025-07-01 17:49:08.308 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.308 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.308 return
2025-07-01 17:49:08.308 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.311 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.311 else:
2025-07-01 17:49:08.312 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.312 eqi = None
2025-07-01 17:49:08.312
2025-07-01 17:49:08.312 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.312 # identical
2025-07-01 17:49:08.312
2025-07-01 17:49:08.312 # pump out diffs from before the synch point
2025-07-01 17:49:08.312 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.312
2025-07-01 17:49:08.312 # do intraline marking on the synch pair
2025-07-01 17:49:08.312 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.312 if eqi is None:
2025-07-01 17:49:08.312 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.312 atags = btags = ""
2025-07-01 17:49:08.312 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.312 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.312 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.313 if tag == 'replace':
2025-07-01 17:49:08.313 atags += '^' * la
2025-07-01 17:49:08.313 btags += '^' * lb
2025-07-01 17:49:08.313 elif tag == 'delete':
2025-07-01 17:49:08.313 atags += '-' * la
2025-07-01 17:49:08.313 elif tag == 'insert':
2025-07-01 17:49:08.313 btags += '+' * lb
2025-07-01 17:49:08.313 elif tag == 'equal':
2025-07-01 17:49:08.313 atags += ' ' * la
2025-07-01 17:49:08.313 btags += ' ' * lb
2025-07-01 17:49:08.313 else:
2025-07-01 17:49:08.313 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.313 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.313 else:
2025-07-01 17:49:08.313 # the synch pair is identical
2025-07-01 17:49:08.313 yield ' ' + aelt
2025-07-01 17:49:08.313
2025-07-01 17:49:08.313 # pump out diffs from after the synch point
2025-07-01 17:49:08.313 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.313
2025-07-01 17:49:08.313 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.314 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.314
2025-07-01 17:49:08.314 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.314 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.314 alo = 375, ahi = 1101
2025-07-01 17:49:08.314 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.314 blo = 375, bhi = 1101
2025-07-01 17:49:08.314
2025-07-01 17:49:08.314 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.314 g = []
2025-07-01 17:49:08.314 if alo < ahi:
2025-07-01 17:49:08.314 if blo < bhi:
2025-07-01 17:49:08.314 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.314 else:
2025-07-01 17:49:08.314 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.314 elif blo < bhi:
2025-07-01 17:49:08.314 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.314
2025-07-01 17:49:08.314 > yield from g
2025-07-01 17:49:08.314
2025-07-01 17:49:08.314 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.315 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.315
2025-07-01 17:49:08.315 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.315 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.315 alo = 375, ahi = 1101
2025-07-01 17:49:08.315 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.315 blo = 375, bhi = 1101
2025-07-01 17:49:08.315
2025-07-01 17:49:08.315 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.315 r"""
2025-07-01 17:49:08.315 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.315 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.315 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.315 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.315
2025-07-01 17:49:08.315 Example:
2025-07-01 17:49:08.316
2025-07-01 17:49:08.316 >>> d = Differ()
2025-07-01 17:49:08.316 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.316 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.316 >>> print(''.join(results), end="")
2025-07-01 17:49:08.316 - abcDefghiJkl
2025-07-01 17:49:08.316 + abcdefGhijkl
2025-07-01 17:49:08.316 """
2025-07-01 17:49:08.316
2025-07-01 17:49:08.316 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.316 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.316 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.316 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.316 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.316
2025-07-01 17:49:08.316 # search for the pair that matches best without being identical
2025-07-01 17:49:08.316 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.316 # on junk -- unless we have to)
2025-07-01 17:49:08.316 for j in range(blo, bhi):
2025-07-01 17:49:08.317 bj = b[j]
2025-07-01 17:49:08.317 cruncher.set_seq2(bj)
2025-07-01 17:49:08.317 for i in range(alo, ahi):
2025-07-01 17:49:08.317 ai = a[i]
2025-07-01 17:49:08.317 if ai == bj:
2025-07-01 17:49:08.317 if eqi is None:
2025-07-01 17:49:08.317 eqi, eqj = i, j
2025-07-01 17:49:08.317 continue
2025-07-01 17:49:08.317 cruncher.set_seq1(ai)
2025-07-01 17:49:08.317 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.317 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.317 # compares by a factor of 3.
2025-07-01 17:49:08.317 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.317 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.317 # of the computation is cached by cruncher
2025-07-01 17:49:08.317 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.317 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.317 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.317 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.317 if best_ratio < cutoff:
2025-07-01 17:49:08.318 # no non-identical "pretty close" pair
2025-07-01 17:49:08.318 if eqi is None:
2025-07-01 17:49:08.318 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.318 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.318 return
2025-07-01 17:49:08.318 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.318 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.318 else:
2025-07-01 17:49:08.318 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.318 eqi = None
2025-07-01 17:49:08.318
2025-07-01 17:49:08.318 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.318 # identical
2025-07-01 17:49:08.318
2025-07-01 17:49:08.318 # pump out diffs from before the synch point
2025-07-01 17:49:08.318 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.318
2025-07-01 17:49:08.318 # do intraline marking on the synch pair
2025-07-01 17:49:08.318 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.318 if eqi is None:
2025-07-01 17:49:08.318 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.319 atags = btags = ""
2025-07-01 17:49:08.319 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.319 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.319 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.319 if tag == 'replace':
2025-07-01 17:49:08.319 atags += '^' * la
2025-07-01 17:49:08.319 btags += '^' * lb
2025-07-01 17:49:08.319 elif tag == 'delete':
2025-07-01 17:49:08.319 atags += '-' * la
2025-07-01 17:49:08.319 elif tag == 'insert':
2025-07-01 17:49:08.319 btags += '+' * lb
2025-07-01 17:49:08.319 elif tag == 'equal':
2025-07-01 17:49:08.319 atags += ' ' * la
2025-07-01 17:49:08.319 btags += ' ' * lb
2025-07-01 17:49:08.319 else:
2025-07-01 17:49:08.319 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.319 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.319 else:
2025-07-01 17:49:08.319 # the synch pair is identical
2025-07-01 17:49:08.319 yield ' ' + aelt
2025-07-01 17:49:08.319
2025-07-01 17:49:08.319 # pump out diffs from after the synch point
2025-07-01 17:49:08.320 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.320
2025-07-01 17:49:08.320 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.320 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.320
2025-07-01 17:49:08.320 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.320 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.320 alo = 378, ahi = 1101
2025-07-01 17:49:08.320 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.320 blo = 378, bhi = 1101
2025-07-01 17:49:08.320
2025-07-01 17:49:08.320 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.320 g = []
2025-07-01 17:49:08.320 if alo < ahi:
2025-07-01 17:49:08.320 if blo < bhi:
2025-07-01 17:49:08.320 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.320 else:
2025-07-01 17:49:08.320 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.320 elif blo < bhi:
2025-07-01 17:49:08.320 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.320
2025-07-01 17:49:08.321 > yield from g
2025-07-01 17:49:08.321
2025-07-01 17:49:08.321 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.321 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.321
2025-07-01 17:49:08.321 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.321 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.321 alo = 378, ahi = 1101
2025-07-01 17:49:08.321 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.321 blo = 378, bhi = 1101
2025-07-01 17:49:08.321
2025-07-01 17:49:08.321 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.321 r"""
2025-07-01 17:49:08.321 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.321 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.321 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.321 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.321
2025-07-01 17:49:08.321 Example:
2025-07-01 17:49:08.321
2025-07-01 17:49:08.321 >>> d = Differ()
2025-07-01 17:49:08.322 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.322 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.322 >>> print(''.join(results), end="")
2025-07-01 17:49:08.322 - abcDefghiJkl
2025-07-01 17:49:08.322 + abcdefGhijkl
2025-07-01 17:49:08.322 """
2025-07-01 17:49:08.322
2025-07-01 17:49:08.322 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.322 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.322 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.322 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.322 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.322
2025-07-01 17:49:08.322 # search for the pair that matches best without being identical
2025-07-01 17:49:08.322 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.322 # on junk -- unless we have to)
2025-07-01 17:49:08.322 for j in range(blo, bhi):
2025-07-01 17:49:08.322 bj = b[j]
2025-07-01 17:49:08.322 cruncher.set_seq2(bj)
2025-07-01 17:49:08.323 for i in range(alo, ahi):
2025-07-01 17:49:08.323 ai = a[i]
2025-07-01 17:49:08.323 if ai == bj:
2025-07-01 17:49:08.323 if eqi is None:
2025-07-01 17:49:08.323 eqi, eqj = i, j
2025-07-01 17:49:08.323 continue
2025-07-01 17:49:08.323 cruncher.set_seq1(ai)
2025-07-01 17:49:08.323 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.323 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.323 # compares by a factor of 3.
2025-07-01 17:49:08.323 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.323 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.323 # of the computation is cached by cruncher
2025-07-01 17:49:08.323 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.323 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.323 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.323 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.323 if best_ratio < cutoff:
2025-07-01 17:49:08.323 # no non-identical "pretty close" pair
2025-07-01 17:49:08.323 if eqi is None:
2025-07-01 17:49:08.324 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.324 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.324 return
2025-07-01 17:49:08.324 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.324 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.324 else:
2025-07-01 17:49:08.324 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.324 eqi = None
2025-07-01 17:49:08.324
2025-07-01 17:49:08.324 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.324 # identical
2025-07-01 17:49:08.324
2025-07-01 17:49:08.324 # pump out diffs from before the synch point
2025-07-01 17:49:08.324 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.324
2025-07-01 17:49:08.324 # do intraline marking on the synch pair
2025-07-01 17:49:08.324 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.324 if eqi is None:
2025-07-01 17:49:08.324 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.325 atags = btags = ""
2025-07-01 17:49:08.330 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.330 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.330 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.330 if tag == 'replace':
2025-07-01 17:49:08.330 atags += '^' * la
2025-07-01 17:49:08.330 btags += '^' * lb
2025-07-01 17:49:08.330 elif tag == 'delete':
2025-07-01 17:49:08.330 atags += '-' * la
2025-07-01 17:49:08.330 elif tag == 'insert':
2025-07-01 17:49:08.330 btags += '+' * lb
2025-07-01 17:49:08.330 elif tag == 'equal':
2025-07-01 17:49:08.330 atags += ' ' * la
2025-07-01 17:49:08.330 btags += ' ' * lb
2025-07-01 17:49:08.330 else:
2025-07-01 17:49:08.330 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.330 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.330 else:
2025-07-01 17:49:08.330 # the synch pair is identical
2025-07-01 17:49:08.330 yield ' ' + aelt
2025-07-01 17:49:08.331
2025-07-01 17:49:08.331 # pump out diffs from after the synch point
2025-07-01 17:49:08.331 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.331
2025-07-01 17:49:08.331 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.331 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.331
2025-07-01 17:49:08.331 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.331 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.331 alo = 379, ahi = 1101
2025-07-01 17:49:08.331 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.331 blo = 379, bhi = 1101
2025-07-01 17:49:08.331
2025-07-01 17:49:08.331 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.331 g = []
2025-07-01 17:49:08.331 if alo < ahi:
2025-07-01 17:49:08.331 if blo < bhi:
2025-07-01 17:49:08.331 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.331 else:
2025-07-01 17:49:08.331 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.331 elif blo < bhi:
2025-07-01 17:49:08.332 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.332
2025-07-01 17:49:08.332 > yield from g
2025-07-01 17:49:08.332
2025-07-01 17:49:08.332 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.332 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.332
2025-07-01 17:49:08.332 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.332 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.332 alo = 379, ahi = 1101
2025-07-01 17:49:08.332 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.332 blo = 379, bhi = 1101
2025-07-01 17:49:08.332
2025-07-01 17:49:08.332 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.332 r"""
2025-07-01 17:49:08.332 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.332 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.332 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.332 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.332
2025-07-01 17:49:08.332 Example:
2025-07-01 17:49:08.333
2025-07-01 17:49:08.333 >>> d = Differ()
2025-07-01 17:49:08.333 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.333 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.333 >>> print(''.join(results), end="")
2025-07-01 17:49:08.333 - abcDefghiJkl
2025-07-01 17:49:08.333 + abcdefGhijkl
2025-07-01 17:49:08.333 """
2025-07-01 17:49:08.333
2025-07-01 17:49:08.333 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.333 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.333 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.333 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.333 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.333
2025-07-01 17:49:08.333 # search for the pair that matches best without being identical
2025-07-01 17:49:08.333 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.333 # on junk -- unless we have to)
2025-07-01 17:49:08.333 for j in range(blo, bhi):
2025-07-01 17:49:08.333 bj = b[j]
2025-07-01 17:49:08.334 cruncher.set_seq2(bj)
2025-07-01 17:49:08.334 for i in range(alo, ahi):
2025-07-01 17:49:08.334 ai = a[i]
2025-07-01 17:49:08.334 if ai == bj:
2025-07-01 17:49:08.334 if eqi is None:
2025-07-01 17:49:08.334 eqi, eqj = i, j
2025-07-01 17:49:08.334 continue
2025-07-01 17:49:08.334 cruncher.set_seq1(ai)
2025-07-01 17:49:08.334 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.334 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.334 # compares by a factor of 3.
2025-07-01 17:49:08.334 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.334 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.334 # of the computation is cached by cruncher
2025-07-01 17:49:08.334 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.334 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.334 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.334 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.334 if best_ratio < cutoff:
2025-07-01 17:49:08.334 # no non-identical "pretty close" pair
2025-07-01 17:49:08.334 if eqi is None:
2025-07-01 17:49:08.335 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.335 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.335 return
2025-07-01 17:49:08.335 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.335 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.335 else:
2025-07-01 17:49:08.335 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.335 eqi = None
2025-07-01 17:49:08.335
2025-07-01 17:49:08.335 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.335 # identical
2025-07-01 17:49:08.335
2025-07-01 17:49:08.335 # pump out diffs from before the synch point
2025-07-01 17:49:08.335 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.335
2025-07-01 17:49:08.335 # do intraline marking on the synch pair
2025-07-01 17:49:08.335 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.335 if eqi is None:
2025-07-01 17:49:08.335 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.335 atags = btags = ""
2025-07-01 17:49:08.335 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.336 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.336 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.336 if tag == 'replace':
2025-07-01 17:49:08.336 atags += '^' * la
2025-07-01 17:49:08.336 btags += '^' * lb
2025-07-01 17:49:08.336 elif tag == 'delete':
2025-07-01 17:49:08.336 atags += '-' * la
2025-07-01 17:49:08.336 elif tag == 'insert':
2025-07-01 17:49:08.336 btags += '+' * lb
2025-07-01 17:49:08.336 elif tag == 'equal':
2025-07-01 17:49:08.336 atags += ' ' * la
2025-07-01 17:49:08.336 btags += ' ' * lb
2025-07-01 17:49:08.336 else:
2025-07-01 17:49:08.336 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.336 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.336 else:
2025-07-01 17:49:08.336 # the synch pair is identical
2025-07-01 17:49:08.336 yield ' ' + aelt
2025-07-01 17:49:08.336
2025-07-01 17:49:08.336 # pump out diffs from after the synch point
2025-07-01 17:49:08.336 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.337
2025-07-01 17:49:08.337 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.337 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.337
2025-07-01 17:49:08.337 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.337 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.337 alo = 380, ahi = 1101
2025-07-01 17:49:08.337 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.337 blo = 380, bhi = 1101
2025-07-01 17:49:08.337
2025-07-01 17:49:08.337 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.337 g = []
2025-07-01 17:49:08.337 if alo < ahi:
2025-07-01 17:49:08.337 if blo < bhi:
2025-07-01 17:49:08.337 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.337 else:
2025-07-01 17:49:08.337 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.337 elif blo < bhi:
2025-07-01 17:49:08.337 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.338
2025-07-01 17:49:08.338 > yield from g
2025-07-01 17:49:08.338
2025-07-01 17:49:08.338 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.338 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.338
2025-07-01 17:49:08.338 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.338 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.338 alo = 380, ahi = 1101
2025-07-01 17:49:08.338 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.338 blo = 380, bhi = 1101
2025-07-01 17:49:08.338
2025-07-01 17:49:08.338 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.338 r"""
2025-07-01 17:49:08.338 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.338 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.338 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.338 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.338
2025-07-01 17:49:08.338 Example:
2025-07-01 17:49:08.339
2025-07-01 17:49:08.339 >>> d = Differ()
2025-07-01 17:49:08.339 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.339 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.339 >>> print(''.join(results), end="")
2025-07-01 17:49:08.339 - abcDefghiJkl
2025-07-01 17:49:08.339 + abcdefGhijkl
2025-07-01 17:49:08.339 """
2025-07-01 17:49:08.339
2025-07-01 17:49:08.339 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.339 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.339 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.339 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.339 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.339
2025-07-01 17:49:08.339 # search for the pair that matches best without being identical
2025-07-01 17:49:08.339 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.339 # on junk -- unless we have to)
2025-07-01 17:49:08.339 for j in range(blo, bhi):
2025-07-01 17:49:08.340 bj = b[j]
2025-07-01 17:49:08.340 cruncher.set_seq2(bj)
2025-07-01 17:49:08.340 for i in range(alo, ahi):
2025-07-01 17:49:08.340 ai = a[i]
2025-07-01 17:49:08.340 if ai == bj:
2025-07-01 17:49:08.340 if eqi is None:
2025-07-01 17:49:08.340 eqi, eqj = i, j
2025-07-01 17:49:08.340 continue
2025-07-01 17:49:08.340 cruncher.set_seq1(ai)
2025-07-01 17:49:08.340 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.340 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.340 # compares by a factor of 3.
2025-07-01 17:49:08.340 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.340 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.340 # of the computation is cached by cruncher
2025-07-01 17:49:08.340 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.340 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.340 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.340 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.340 if best_ratio < cutoff:
2025-07-01 17:49:08.341 # no non-identical "pretty close" pair
2025-07-01 17:49:08.343 if eqi is None:
2025-07-01 17:49:08.343 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.343 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.344 return
2025-07-01 17:49:08.344 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.344 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.344 else:
2025-07-01 17:49:08.344 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.344 eqi = None
2025-07-01 17:49:08.344
2025-07-01 17:49:08.344 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.344 # identical
2025-07-01 17:49:08.344
2025-07-01 17:49:08.344 # pump out diffs from before the synch point
2025-07-01 17:49:08.344 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.344
2025-07-01 17:49:08.344 # do intraline marking on the synch pair
2025-07-01 17:49:08.344 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.344 if eqi is None:
2025-07-01 17:49:08.344 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.344 atags = btags = ""
2025-07-01 17:49:08.344 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.344 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.344 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.345 if tag == 'replace':
2025-07-01 17:49:08.345 atags += '^' * la
2025-07-01 17:49:08.345 btags += '^' * lb
2025-07-01 17:49:08.345 elif tag == 'delete':
2025-07-01 17:49:08.345 atags += '-' * la
2025-07-01 17:49:08.345 elif tag == 'insert':
2025-07-01 17:49:08.345 btags += '+' * lb
2025-07-01 17:49:08.345 elif tag == 'equal':
2025-07-01 17:49:08.345 atags += ' ' * la
2025-07-01 17:49:08.345 btags += ' ' * lb
2025-07-01 17:49:08.345 else:
2025-07-01 17:49:08.345 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.345 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.345 else:
2025-07-01 17:49:08.345 # the synch pair is identical
2025-07-01 17:49:08.345 yield ' ' + aelt
2025-07-01 17:49:08.345
2025-07-01 17:49:08.345 # pump out diffs from after the synch point
2025-07-01 17:49:08.345 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.345
2025-07-01 17:49:08.345 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.346 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.346
2025-07-01 17:49:08.346 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.346 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.346 alo = 381, ahi = 1101
2025-07-01 17:49:08.346 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.346 blo = 381, bhi = 1101
2025-07-01 17:49:08.346
2025-07-01 17:49:08.346 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.346 g = []
2025-07-01 17:49:08.346 if alo < ahi:
2025-07-01 17:49:08.346 if blo < bhi:
2025-07-01 17:49:08.346 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.346 else:
2025-07-01 17:49:08.346 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.346 elif blo < bhi:
2025-07-01 17:49:08.346 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.346
2025-07-01 17:49:08.346 > yield from g
2025-07-01 17:49:08.346
2025-07-01 17:49:08.346 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.346 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.347
2025-07-01 17:49:08.347 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.347 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.347 alo = 381, ahi = 1101
2025-07-01 17:49:08.347 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.347 blo = 381, bhi = 1101
2025-07-01 17:49:08.347
2025-07-01 17:49:08.347 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.347 r"""
2025-07-01 17:49:08.347 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.347 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.347 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.347 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.347
2025-07-01 17:49:08.347 Example:
2025-07-01 17:49:08.347
2025-07-01 17:49:08.347 >>> d = Differ()
2025-07-01 17:49:08.347 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.347 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.347 >>> print(''.join(results), end="")
2025-07-01 17:49:08.348 - abcDefghiJkl
2025-07-01 17:49:08.348 + abcdefGhijkl
2025-07-01 17:49:08.348 """
2025-07-01 17:49:08.348
2025-07-01 17:49:08.348 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.348 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.348 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.348 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.348 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.348
2025-07-01 17:49:08.348 # search for the pair that matches best without being identical
2025-07-01 17:49:08.348 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.348 # on junk -- unless we have to)
2025-07-01 17:49:08.348 for j in range(blo, bhi):
2025-07-01 17:49:08.348 bj = b[j]
2025-07-01 17:49:08.348 cruncher.set_seq2(bj)
2025-07-01 17:49:08.349 for i in range(alo, ahi):
2025-07-01 17:49:08.349 ai = a[i]
2025-07-01 17:49:08.349 if ai == bj:
2025-07-01 17:49:08.349 if eqi is None:
2025-07-01 17:49:08.349 eqi, eqj = i, j
2025-07-01 17:49:08.349 continue
2025-07-01 17:49:08.349 cruncher.set_seq1(ai)
2025-07-01 17:49:08.349 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.349 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.349 # compares by a factor of 3.
2025-07-01 17:49:08.349 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.349 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.349 # of the computation is cached by cruncher
2025-07-01 17:49:08.349 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.349 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.349 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.349 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.349 if best_ratio < cutoff:
2025-07-01 17:49:08.349 # no non-identical "pretty close" pair
2025-07-01 17:49:08.349 if eqi is None:
2025-07-01 17:49:08.350 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.350 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.350 return
2025-07-01 17:49:08.350 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.350 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.350 else:
2025-07-01 17:49:08.350 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.350 eqi = None
2025-07-01 17:49:08.350
2025-07-01 17:49:08.350 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.350 # identical
2025-07-01 17:49:08.350
2025-07-01 17:49:08.350 # pump out diffs from before the synch point
2025-07-01 17:49:08.350 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.350
2025-07-01 17:49:08.350 # do intraline marking on the synch pair
2025-07-01 17:49:08.350 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.350 if eqi is None:
2025-07-01 17:49:08.350 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.350 atags = btags = ""
2025-07-01 17:49:08.350 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.351 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.351 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.351 if tag == 'replace':
2025-07-01 17:49:08.351 atags += '^' * la
2025-07-01 17:49:08.351 btags += '^' * lb
2025-07-01 17:49:08.351 elif tag == 'delete':
2025-07-01 17:49:08.351 atags += '-' * la
2025-07-01 17:49:08.351 elif tag == 'insert':
2025-07-01 17:49:08.351 btags += '+' * lb
2025-07-01 17:49:08.351 elif tag == 'equal':
2025-07-01 17:49:08.351 atags += ' ' * la
2025-07-01 17:49:08.351 btags += ' ' * lb
2025-07-01 17:49:08.351 else:
2025-07-01 17:49:08.351 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.351 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.351 else:
2025-07-01 17:49:08.351 # the synch pair is identical
2025-07-01 17:49:08.351 yield ' ' + aelt
2025-07-01 17:49:08.351
2025-07-01 17:49:08.351 # pump out diffs from after the synch point
2025-07-01 17:49:08.351 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.351
2025-07-01 17:49:08.352 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.352 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.352
2025-07-01 17:49:08.352 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.352 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.352 alo = 382, ahi = 1101
2025-07-01 17:49:08.352 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.352 blo = 382, bhi = 1101
2025-07-01 17:49:08.352
2025-07-01 17:49:08.352 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.352 g = []
2025-07-01 17:49:08.352 if alo < ahi:
2025-07-01 17:49:08.352 if blo < bhi:
2025-07-01 17:49:08.352 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.352 else:
2025-07-01 17:49:08.352 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.352 elif blo < bhi:
2025-07-01 17:49:08.352 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.352
2025-07-01 17:49:08.352 > yield from g
2025-07-01 17:49:08.352
2025-07-01 17:49:08.353 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.353 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.353
2025-07-01 17:49:08.353 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.353 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.353 alo = 382, ahi = 1101
2025-07-01 17:49:08.353 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.353 blo = 382, bhi = 1101
2025-07-01 17:49:08.353
2025-07-01 17:49:08.353 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.353 r"""
2025-07-01 17:49:08.353 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.353 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.353 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.353 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.353
2025-07-01 17:49:08.353 Example:
2025-07-01 17:49:08.353
2025-07-01 17:49:08.353 >>> d = Differ()
2025-07-01 17:49:08.353 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.353 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.354 >>> print(''.join(results), end="")
2025-07-01 17:49:08.354 - abcDefghiJkl
2025-07-01 17:49:08.354 + abcdefGhijkl
2025-07-01 17:49:08.354 """
2025-07-01 17:49:08.354
2025-07-01 17:49:08.354 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.354 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.354 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.354 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.354 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.354
2025-07-01 17:49:08.354 # search for the pair that matches best without being identical
2025-07-01 17:49:08.354 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.354 # on junk -- unless we have to)
2025-07-01 17:49:08.354 for j in range(blo, bhi):
2025-07-01 17:49:08.354 bj = b[j]
2025-07-01 17:49:08.354 cruncher.set_seq2(bj)
2025-07-01 17:49:08.354 for i in range(alo, ahi):
2025-07-01 17:49:08.354 ai = a[i]
2025-07-01 17:49:08.354 if ai == bj:
2025-07-01 17:49:08.355 if eqi is None:
2025-07-01 17:49:08.355 eqi, eqj = i, j
2025-07-01 17:49:08.355 continue
2025-07-01 17:49:08.355 cruncher.set_seq1(ai)
2025-07-01 17:49:08.355 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.355 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.355 # compares by a factor of 3.
2025-07-01 17:49:08.355 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.355 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.355 # of the computation is cached by cruncher
2025-07-01 17:49:08.355 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.355 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.355 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.355 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.355 if best_ratio < cutoff:
2025-07-01 17:49:08.355 # no non-identical "pretty close" pair
2025-07-01 17:49:08.355 if eqi is None:
2025-07-01 17:49:08.355 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.355 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.355 return
2025-07-01 17:49:08.355 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.356 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.356 else:
2025-07-01 17:49:08.356 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.356 eqi = None
2025-07-01 17:49:08.356
2025-07-01 17:49:08.356 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.356 # identical
2025-07-01 17:49:08.356
2025-07-01 17:49:08.356 # pump out diffs from before the synch point
2025-07-01 17:49:08.356 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.356
2025-07-01 17:49:08.356 # do intraline marking on the synch pair
2025-07-01 17:49:08.356 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.356 if eqi is None:
2025-07-01 17:49:08.356 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.356 atags = btags = ""
2025-07-01 17:49:08.356 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.356 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.356 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.356 if tag == 'replace':
2025-07-01 17:49:08.356 atags += '^' * la
2025-07-01 17:49:08.357 btags += '^' * lb
2025-07-01 17:49:08.362 elif tag == 'delete':
2025-07-01 17:49:08.362 atags += '-' * la
2025-07-01 17:49:08.362 elif tag == 'insert':
2025-07-01 17:49:08.362 btags += '+' * lb
2025-07-01 17:49:08.362 elif tag == 'equal':
2025-07-01 17:49:08.362 atags += ' ' * la
2025-07-01 17:49:08.362 btags += ' ' * lb
2025-07-01 17:49:08.362 else:
2025-07-01 17:49:08.362 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.362 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.362 else:
2025-07-01 17:49:08.362 # the synch pair is identical
2025-07-01 17:49:08.362 yield ' ' + aelt
2025-07-01 17:49:08.362
2025-07-01 17:49:08.362 # pump out diffs from after the synch point
2025-07-01 17:49:08.362 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.362
2025-07-01 17:49:08.362 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.362 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.363
2025-07-01 17:49:08.363 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.363 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.363 alo = 383, ahi = 1101
2025-07-01 17:49:08.363 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.363 blo = 383, bhi = 1101
2025-07-01 17:49:08.363
2025-07-01 17:49:08.363 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.363 g = []
2025-07-01 17:49:08.363 if alo < ahi:
2025-07-01 17:49:08.363 if blo < bhi:
2025-07-01 17:49:08.363 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.363 else:
2025-07-01 17:49:08.363 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.363 elif blo < bhi:
2025-07-01 17:49:08.363 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.363
2025-07-01 17:49:08.363 > yield from g
2025-07-01 17:49:08.363
2025-07-01 17:49:08.363 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.363 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.364
2025-07-01 17:49:08.364 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.364 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.364 alo = 383, ahi = 1101
2025-07-01 17:49:08.364 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.364 blo = 383, bhi = 1101
2025-07-01 17:49:08.364
2025-07-01 17:49:08.364 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.364 r"""
2025-07-01 17:49:08.364 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.364 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.364 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.364 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.364
2025-07-01 17:49:08.364 Example:
2025-07-01 17:49:08.364
2025-07-01 17:49:08.364 >>> d = Differ()
2025-07-01 17:49:08.364 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.364 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.364 >>> print(''.join(results), end="")
2025-07-01 17:49:08.364 - abcDefghiJkl
2025-07-01 17:49:08.365 + abcdefGhijkl
2025-07-01 17:49:08.365 """
2025-07-01 17:49:08.365
2025-07-01 17:49:08.365 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.365 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.365 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.365 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.365 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.365
2025-07-01 17:49:08.365 # search for the pair that matches best without being identical
2025-07-01 17:49:08.365 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.365 # on junk -- unless we have to)
2025-07-01 17:49:08.365 for j in range(blo, bhi):
2025-07-01 17:49:08.365 bj = b[j]
2025-07-01 17:49:08.365 cruncher.set_seq2(bj)
2025-07-01 17:49:08.365 for i in range(alo, ahi):
2025-07-01 17:49:08.365 ai = a[i]
2025-07-01 17:49:08.365 if ai == bj:
2025-07-01 17:49:08.366 if eqi is None:
2025-07-01 17:49:08.366 eqi, eqj = i, j
2025-07-01 17:49:08.366 continue
2025-07-01 17:49:08.366 cruncher.set_seq1(ai)
2025-07-01 17:49:08.366 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.366 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.366 # compares by a factor of 3.
2025-07-01 17:49:08.366 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.366 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.366 # of the computation is cached by cruncher
2025-07-01 17:49:08.366 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.366 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.366 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.366 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.366 if best_ratio < cutoff:
2025-07-01 17:49:08.366 # no non-identical "pretty close" pair
2025-07-01 17:49:08.366 if eqi is None:
2025-07-01 17:49:08.366 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.366 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.366 return
2025-07-01 17:49:08.367 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.367 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.367 else:
2025-07-01 17:49:08.367 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.367 eqi = None
2025-07-01 17:49:08.367
2025-07-01 17:49:08.367 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.367 # identical
2025-07-01 17:49:08.367
2025-07-01 17:49:08.367 # pump out diffs from before the synch point
2025-07-01 17:49:08.367 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.367
2025-07-01 17:49:08.367 # do intraline marking on the synch pair
2025-07-01 17:49:08.367 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.367 if eqi is None:
2025-07-01 17:49:08.367 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.367 atags = btags = ""
2025-07-01 17:49:08.367 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.367 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.367 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.367 if tag == 'replace':
2025-07-01 17:49:08.367 atags += '^' * la
2025-07-01 17:49:08.368 btags += '^' * lb
2025-07-01 17:49:08.368 elif tag == 'delete':
2025-07-01 17:49:08.368 atags += '-' * la
2025-07-01 17:49:08.368 elif tag == 'insert':
2025-07-01 17:49:08.368 btags += '+' * lb
2025-07-01 17:49:08.368 elif tag == 'equal':
2025-07-01 17:49:08.368 atags += ' ' * la
2025-07-01 17:49:08.368 btags += ' ' * lb
2025-07-01 17:49:08.368 else:
2025-07-01 17:49:08.368 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.368 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.368 else:
2025-07-01 17:49:08.368 # the synch pair is identical
2025-07-01 17:49:08.368 yield ' ' + aelt
2025-07-01 17:49:08.368
2025-07-01 17:49:08.368 # pump out diffs from after the synch point
2025-07-01 17:49:08.368 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.368
2025-07-01 17:49:08.368 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.368 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.368
2025-07-01 17:49:08.368 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.369 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.369 alo = 384, ahi = 1101
2025-07-01 17:49:08.369 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.369 blo = 384, bhi = 1101
2025-07-01 17:49:08.369
2025-07-01 17:49:08.369 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.369 g = []
2025-07-01 17:49:08.369 if alo < ahi:
2025-07-01 17:49:08.369 if blo < bhi:
2025-07-01 17:49:08.369 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.369 else:
2025-07-01 17:49:08.369 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.369 elif blo < bhi:
2025-07-01 17:49:08.369 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.369
2025-07-01 17:49:08.369 > yield from g
2025-07-01 17:49:08.369
2025-07-01 17:49:08.369 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.369 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.369
2025-07-01 17:49:08.369 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.370 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.370 alo = 384, ahi = 1101
2025-07-01 17:49:08.370 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.370 blo = 384, bhi = 1101
2025-07-01 17:49:08.370
2025-07-01 17:49:08.370 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.370 r"""
2025-07-01 17:49:08.370 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.370 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.370 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.370 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.370
2025-07-01 17:49:08.370 Example:
2025-07-01 17:49:08.370
2025-07-01 17:49:08.370 >>> d = Differ()
2025-07-01 17:49:08.370 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.370 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.370 >>> print(''.join(results), end="")
2025-07-01 17:49:08.370 - abcDefghiJkl
2025-07-01 17:49:08.371 + abcdefGhijkl
2025-07-01 17:49:08.371 """
2025-07-01 17:49:08.371
2025-07-01 17:49:08.371 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.371 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.371 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.371 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.371 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.371
2025-07-01 17:49:08.371 # search for the pair that matches best without being identical
2025-07-01 17:49:08.371 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.371 # on junk -- unless we have to)
2025-07-01 17:49:08.371 for j in range(blo, bhi):
2025-07-01 17:49:08.371 bj = b[j]
2025-07-01 17:49:08.371 cruncher.set_seq2(bj)
2025-07-01 17:49:08.371 for i in range(alo, ahi):
2025-07-01 17:49:08.371 ai = a[i]
2025-07-01 17:49:08.371 if ai == bj:
2025-07-01 17:49:08.372 if eqi is None:
2025-07-01 17:49:08.372 eqi, eqj = i, j
2025-07-01 17:49:08.372 continue
2025-07-01 17:49:08.372 cruncher.set_seq1(ai)
2025-07-01 17:49:08.372 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.372 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.372 # compares by a factor of 3.
2025-07-01 17:49:08.372 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.372 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.372 # of the computation is cached by cruncher
2025-07-01 17:49:08.372 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.372 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.372 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.372 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.372 if best_ratio < cutoff:
2025-07-01 17:49:08.372 # no non-identical "pretty close" pair
2025-07-01 17:49:08.372 if eqi is None:
2025-07-01 17:49:08.372 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.372 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.372 return
2025-07-01 17:49:08.372 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.376 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.376 else:
2025-07-01 17:49:08.376 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.376 eqi = None
2025-07-01 17:49:08.376
2025-07-01 17:49:08.376 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.376 # identical
2025-07-01 17:49:08.376
2025-07-01 17:49:08.376 # pump out diffs from before the synch point
2025-07-01 17:49:08.376 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.376
2025-07-01 17:49:08.376 # do intraline marking on the synch pair
2025-07-01 17:49:08.376 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.376 if eqi is None:
2025-07-01 17:49:08.376 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.376 atags = btags = ""
2025-07-01 17:49:08.376 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.376 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.376 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.377 if tag == 'replace':
2025-07-01 17:49:08.377 atags += '^' * la
2025-07-01 17:49:08.377 btags += '^' * lb
2025-07-01 17:49:08.377 elif tag == 'delete':
2025-07-01 17:49:08.377 atags += '-' * la
2025-07-01 17:49:08.377 elif tag == 'insert':
2025-07-01 17:49:08.377 btags += '+' * lb
2025-07-01 17:49:08.377 elif tag == 'equal':
2025-07-01 17:49:08.377 atags += ' ' * la
2025-07-01 17:49:08.377 btags += ' ' * lb
2025-07-01 17:49:08.377 else:
2025-07-01 17:49:08.377 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.377 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.377 else:
2025-07-01 17:49:08.377 # the synch pair is identical
2025-07-01 17:49:08.377 yield ' ' + aelt
2025-07-01 17:49:08.377
2025-07-01 17:49:08.377 # pump out diffs from after the synch point
2025-07-01 17:49:08.377 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.377
2025-07-01 17:49:08.377 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.378 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.378
2025-07-01 17:49:08.378 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.378 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.378 alo = 385, ahi = 1101
2025-07-01 17:49:08.378 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.378 blo = 385, bhi = 1101
2025-07-01 17:49:08.378
2025-07-01 17:49:08.378 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.378 g = []
2025-07-01 17:49:08.378 if alo < ahi:
2025-07-01 17:49:08.378 if blo < bhi:
2025-07-01 17:49:08.378 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.378 else:
2025-07-01 17:49:08.378 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.378 elif blo < bhi:
2025-07-01 17:49:08.378 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.378
2025-07-01 17:49:08.378 > yield from g
2025-07-01 17:49:08.378
2025-07-01 17:49:08.378 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.379 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.379
2025-07-01 17:49:08.379 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.379 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.379 alo = 385, ahi = 1101
2025-07-01 17:49:08.379 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.379 blo = 385, bhi = 1101
2025-07-01 17:49:08.379
2025-07-01 17:49:08.379 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.379 r"""
2025-07-01 17:49:08.379 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.379 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.379 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.379 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.379
2025-07-01 17:49:08.379 Example:
2025-07-01 17:49:08.379
2025-07-01 17:49:08.379 >>> d = Differ()
2025-07-01 17:49:08.379 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.379 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.380 >>> print(''.join(results), end="")
2025-07-01 17:49:08.380 - abcDefghiJkl
2025-07-01 17:49:08.380 + abcdefGhijkl
2025-07-01 17:49:08.380 """
2025-07-01 17:49:08.380
2025-07-01 17:49:08.380 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.380 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.380 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.380 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.380 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.380
2025-07-01 17:49:08.380 # search for the pair that matches best without being identical
2025-07-01 17:49:08.380 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.380 # on junk -- unless we have to)
2025-07-01 17:49:08.380 for j in range(blo, bhi):
2025-07-01 17:49:08.380 bj = b[j]
2025-07-01 17:49:08.380 cruncher.set_seq2(bj)
2025-07-01 17:49:08.380 for i in range(alo, ahi):
2025-07-01 17:49:08.380 ai = a[i]
2025-07-01 17:49:08.381 if ai == bj:
2025-07-01 17:49:08.381 if eqi is None:
2025-07-01 17:49:08.381 eqi, eqj = i, j
2025-07-01 17:49:08.381 continue
2025-07-01 17:49:08.381 cruncher.set_seq1(ai)
2025-07-01 17:49:08.381 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.381 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.381 # compares by a factor of 3.
2025-07-01 17:49:08.381 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.381 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.381 # of the computation is cached by cruncher
2025-07-01 17:49:08.381 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.381 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.381 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.381 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.381 if best_ratio < cutoff:
2025-07-01 17:49:08.381 # no non-identical "pretty close" pair
2025-07-01 17:49:08.381 if eqi is None:
2025-07-01 17:49:08.381 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.382 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.382 return
2025-07-01 17:49:08.382 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.382 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.382 else:
2025-07-01 17:49:08.382 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.382 eqi = None
2025-07-01 17:49:08.382
2025-07-01 17:49:08.382 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.382 # identical
2025-07-01 17:49:08.382
2025-07-01 17:49:08.382 # pump out diffs from before the synch point
2025-07-01 17:49:08.382 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.382
2025-07-01 17:49:08.382 # do intraline marking on the synch pair
2025-07-01 17:49:08.382 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.382 if eqi is None:
2025-07-01 17:49:08.382 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.382 atags = btags = ""
2025-07-01 17:49:08.382 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.383 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.383 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.383 if tag == 'replace':
2025-07-01 17:49:08.383 atags += '^' * la
2025-07-01 17:49:08.383 btags += '^' * lb
2025-07-01 17:49:08.383 elif tag == 'delete':
2025-07-01 17:49:08.383 atags += '-' * la
2025-07-01 17:49:08.383 elif tag == 'insert':
2025-07-01 17:49:08.383 btags += '+' * lb
2025-07-01 17:49:08.383 elif tag == 'equal':
2025-07-01 17:49:08.383 atags += ' ' * la
2025-07-01 17:49:08.383 btags += ' ' * lb
2025-07-01 17:49:08.383 else:
2025-07-01 17:49:08.383 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.383 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.383 else:
2025-07-01 17:49:08.383 # the synch pair is identical
2025-07-01 17:49:08.383 yield ' ' + aelt
2025-07-01 17:49:08.383
2025-07-01 17:49:08.383 # pump out diffs from after the synch point
2025-07-01 17:49:08.383 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.384
2025-07-01 17:49:08.384 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.384 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.384
2025-07-01 17:49:08.384 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.384 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.384 alo = 386, ahi = 1101
2025-07-01 17:49:08.384 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.384 blo = 386, bhi = 1101
2025-07-01 17:49:08.384
2025-07-01 17:49:08.384 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.384 g = []
2025-07-01 17:49:08.384 if alo < ahi:
2025-07-01 17:49:08.384 if blo < bhi:
2025-07-01 17:49:08.384 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.384 else:
2025-07-01 17:49:08.384 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.384 elif blo < bhi:
2025-07-01 17:49:08.384 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.384
2025-07-01 17:49:08.384 > yield from g
2025-07-01 17:49:08.385
2025-07-01 17:49:08.385 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.385 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.385
2025-07-01 17:49:08.385 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.385 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.385 alo = 386, ahi = 1101
2025-07-01 17:49:08.385 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.385 blo = 386, bhi = 1101
2025-07-01 17:49:08.385
2025-07-01 17:49:08.385 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.385 r"""
2025-07-01 17:49:08.385 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.385 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.385 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.385 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.385
2025-07-01 17:49:08.385 Example:
2025-07-01 17:49:08.385
2025-07-01 17:49:08.385 >>> d = Differ()
2025-07-01 17:49:08.385 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.386 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.386 >>> print(''.join(results), end="")
2025-07-01 17:49:08.386 - abcDefghiJkl
2025-07-01 17:49:08.386 + abcdefGhijkl
2025-07-01 17:49:08.386 """
2025-07-01 17:49:08.386
2025-07-01 17:49:08.386 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.386 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.386 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.386 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.386 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.386
2025-07-01 17:49:08.386 # search for the pair that matches best without being identical
2025-07-01 17:49:08.386 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.386 # on junk -- unless we have to)
2025-07-01 17:49:08.386 for j in range(blo, bhi):
2025-07-01 17:49:08.386 bj = b[j]
2025-07-01 17:49:08.386 cruncher.set_seq2(bj)
2025-07-01 17:49:08.386 for i in range(alo, ahi):
2025-07-01 17:49:08.386 ai = a[i]
2025-07-01 17:49:08.387 if ai == bj:
2025-07-01 17:49:08.387 if eqi is None:
2025-07-01 17:49:08.387 eqi, eqj = i, j
2025-07-01 17:49:08.387 continue
2025-07-01 17:49:08.387 cruncher.set_seq1(ai)
2025-07-01 17:49:08.387 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.387 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.387 # compares by a factor of 3.
2025-07-01 17:49:08.387 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.387 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.387 # of the computation is cached by cruncher
2025-07-01 17:49:08.387 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.387 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.387 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.387 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.387 if best_ratio < cutoff:
2025-07-01 17:49:08.387 # no non-identical "pretty close" pair
2025-07-01 17:49:08.387 if eqi is None:
2025-07-01 17:49:08.387 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.387 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.387 return
2025-07-01 17:49:08.388 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.388 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.388 else:
2025-07-01 17:49:08.388 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.388 eqi = None
2025-07-01 17:49:08.388
2025-07-01 17:49:08.388 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.388 # identical
2025-07-01 17:49:08.388
2025-07-01 17:49:08.388 # pump out diffs from before the synch point
2025-07-01 17:49:08.388 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.388
2025-07-01 17:49:08.388 # do intraline marking on the synch pair
2025-07-01 17:49:08.388 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.388 if eqi is None:
2025-07-01 17:49:08.388 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.388 atags = btags = ""
2025-07-01 17:49:08.388 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.388 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.388 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.388 if tag == 'replace':
2025-07-01 17:49:08.389 atags += '^' * la
2025-07-01 17:49:08.394 btags += '^' * lb
2025-07-01 17:49:08.394 elif tag == 'delete':
2025-07-01 17:49:08.394 atags += '-' * la
2025-07-01 17:49:08.394 elif tag == 'insert':
2025-07-01 17:49:08.394 btags += '+' * lb
2025-07-01 17:49:08.394 elif tag == 'equal':
2025-07-01 17:49:08.394 atags += ' ' * la
2025-07-01 17:49:08.394 btags += ' ' * lb
2025-07-01 17:49:08.394 else:
2025-07-01 17:49:08.394 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.394 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.394 else:
2025-07-01 17:49:08.394 # the synch pair is identical
2025-07-01 17:49:08.394 yield ' ' + aelt
2025-07-01 17:49:08.394
2025-07-01 17:49:08.394 # pump out diffs from after the synch point
2025-07-01 17:49:08.394 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.395
2025-07-01 17:49:08.395 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.395 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.395
2025-07-01 17:49:08.395 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.395 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.395 alo = 387, ahi = 1101
2025-07-01 17:49:08.395 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.395 blo = 387, bhi = 1101
2025-07-01 17:49:08.395
2025-07-01 17:49:08.395 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.395 g = []
2025-07-01 17:49:08.395 if alo < ahi:
2025-07-01 17:49:08.395 if blo < bhi:
2025-07-01 17:49:08.396 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.396 else:
2025-07-01 17:49:08.396 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.396 elif blo < bhi:
2025-07-01 17:49:08.396 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.396
2025-07-01 17:49:08.396 > yield from g
2025-07-01 17:49:08.396
2025-07-01 17:49:08.396 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.396 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.396
2025-07-01 17:49:08.396 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.396 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.396 alo = 387, ahi = 1101
2025-07-01 17:49:08.396 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.396 blo = 387, bhi = 1101
2025-07-01 17:49:08.396
2025-07-01 17:49:08.396 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.396 r"""
2025-07-01 17:49:08.396 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.397 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.397 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.397 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.397
2025-07-01 17:49:08.397 Example:
2025-07-01 17:49:08.397
2025-07-01 17:49:08.397 >>> d = Differ()
2025-07-01 17:49:08.397 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.397 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.397 >>> print(''.join(results), end="")
2025-07-01 17:49:08.397 - abcDefghiJkl
2025-07-01 17:49:08.397 + abcdefGhijkl
2025-07-01 17:49:08.397 """
2025-07-01 17:49:08.397
2025-07-01 17:49:08.397 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.397 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.397 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.397 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.398 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.398
2025-07-01 17:49:08.398 # search for the pair that matches best without being identical
2025-07-01 17:49:08.398 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.398 # on junk -- unless we have to)
2025-07-01 17:49:08.398 for j in range(blo, bhi):
2025-07-01 17:49:08.398 bj = b[j]
2025-07-01 17:49:08.398 cruncher.set_seq2(bj)
2025-07-01 17:49:08.398 for i in range(alo, ahi):
2025-07-01 17:49:08.398 ai = a[i]
2025-07-01 17:49:08.398 if ai == bj:
2025-07-01 17:49:08.398 if eqi is None:
2025-07-01 17:49:08.398 eqi, eqj = i, j
2025-07-01 17:49:08.398 continue
2025-07-01 17:49:08.398 cruncher.set_seq1(ai)
2025-07-01 17:49:08.398 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.398 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.398 # compares by a factor of 3.
2025-07-01 17:49:08.398 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.398 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.399 # of the computation is cached by cruncher
2025-07-01 17:49:08.399 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.399 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.399 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.399 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.399 if best_ratio < cutoff:
2025-07-01 17:49:08.399 # no non-identical "pretty close" pair
2025-07-01 17:49:08.399 if eqi is None:
2025-07-01 17:49:08.399 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.399 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.399 return
2025-07-01 17:49:08.399 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.399 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.399 else:
2025-07-01 17:49:08.399 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.399 eqi = None
2025-07-01 17:49:08.399
2025-07-01 17:49:08.399 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.399 # identical
2025-07-01 17:49:08.399
2025-07-01 17:49:08.399 # pump out diffs from before the synch point
2025-07-01 17:49:08.400 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.400
2025-07-01 17:49:08.400 # do intraline marking on the synch pair
2025-07-01 17:49:08.400 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.400 if eqi is None:
2025-07-01 17:49:08.400 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.400 atags = btags = ""
2025-07-01 17:49:08.400 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.400 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.400 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.400 if tag == 'replace':
2025-07-01 17:49:08.400 atags += '^' * la
2025-07-01 17:49:08.400 btags += '^' * lb
2025-07-01 17:49:08.400 elif tag == 'delete':
2025-07-01 17:49:08.400 atags += '-' * la
2025-07-01 17:49:08.400 elif tag == 'insert':
2025-07-01 17:49:08.400 btags += '+' * lb
2025-07-01 17:49:08.400 elif tag == 'equal':
2025-07-01 17:49:08.400 atags += ' ' * la
2025-07-01 17:49:08.400 btags += ' ' * lb
2025-07-01 17:49:08.400 else:
2025-07-01 17:49:08.400 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.401 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.401 else:
2025-07-01 17:49:08.401 # the synch pair is identical
2025-07-01 17:49:08.401 yield ' ' + aelt
2025-07-01 17:49:08.401
2025-07-01 17:49:08.401 # pump out diffs from after the synch point
2025-07-01 17:49:08.401 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.401
2025-07-01 17:49:08.401 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.401 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.401
2025-07-01 17:49:08.401 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.401 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.401 alo = 388, ahi = 1101
2025-07-01 17:49:08.401 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.401 blo = 388, bhi = 1101
2025-07-01 17:49:08.401
2025-07-01 17:49:08.401 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.401 g = []
2025-07-01 17:49:08.401 if alo < ahi:
2025-07-01 17:49:08.401 if blo < bhi:
2025-07-01 17:49:08.402 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.402 else:
2025-07-01 17:49:08.402 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.402 elif blo < bhi:
2025-07-01 17:49:08.402 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.402
2025-07-01 17:49:08.402 > yield from g
2025-07-01 17:49:08.402
2025-07-01 17:49:08.402 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.402 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.402
2025-07-01 17:49:08.402 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.402 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.402 alo = 388, ahi = 1101
2025-07-01 17:49:08.402 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.402 blo = 388, bhi = 1101
2025-07-01 17:49:08.402
2025-07-01 17:49:08.402 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.402 r"""
2025-07-01 17:49:08.402 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.402 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.402 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.403 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.403
2025-07-01 17:49:08.403 Example:
2025-07-01 17:49:08.403
2025-07-01 17:49:08.403 >>> d = Differ()
2025-07-01 17:49:08.403 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.403 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.403 >>> print(''.join(results), end="")
2025-07-01 17:49:08.403 - abcDefghiJkl
2025-07-01 17:49:08.403 + abcdefGhijkl
2025-07-01 17:49:08.403 """
2025-07-01 17:49:08.403
2025-07-01 17:49:08.403 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.403 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.403 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.403 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.404 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.404
2025-07-01 17:49:08.404 # search for the pair that matches best without being identical
2025-07-01 17:49:08.404 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.404 # on junk -- unless we have to)
2025-07-01 17:49:08.404 for j in range(blo, bhi):
2025-07-01 17:49:08.404 bj = b[j]
2025-07-01 17:49:08.404 cruncher.set_seq2(bj)
2025-07-01 17:49:08.404 for i in range(alo, ahi):
2025-07-01 17:49:08.404 ai = a[i]
2025-07-01 17:49:08.404 if ai == bj:
2025-07-01 17:49:08.404 if eqi is None:
2025-07-01 17:49:08.404 eqi, eqj = i, j
2025-07-01 17:49:08.404 continue
2025-07-01 17:49:08.404 cruncher.set_seq1(ai)
2025-07-01 17:49:08.404 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.404 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.404 # compares by a factor of 3.
2025-07-01 17:49:08.404 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.404 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.405 # of the computation is cached by cruncher
2025-07-01 17:49:08.408 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.408 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.408 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.408 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.408 if best_ratio < cutoff:
2025-07-01 17:49:08.408 # no non-identical "pretty close" pair
2025-07-01 17:49:08.408 if eqi is None:
2025-07-01 17:49:08.408 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.408 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.408 return
2025-07-01 17:49:08.408 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.408 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.408 else:
2025-07-01 17:49:08.408 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.408 eqi = None
2025-07-01 17:49:08.408
2025-07-01 17:49:08.408 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.409 # identical
2025-07-01 17:49:08.409
2025-07-01 17:49:08.409 # pump out diffs from before the synch point
2025-07-01 17:49:08.409 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.409
2025-07-01 17:49:08.409 # do intraline marking on the synch pair
2025-07-01 17:49:08.409 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.409 if eqi is None:
2025-07-01 17:49:08.409 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.409 atags = btags = ""
2025-07-01 17:49:08.409 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.409 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.409 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.409 if tag == 'replace':
2025-07-01 17:49:08.409 atags += '^' * la
2025-07-01 17:49:08.409 btags += '^' * lb
2025-07-01 17:49:08.409 elif tag == 'delete':
2025-07-01 17:49:08.409 atags += '-' * la
2025-07-01 17:49:08.409 elif tag == 'insert':
2025-07-01 17:49:08.409 btags += '+' * lb
2025-07-01 17:49:08.409 elif tag == 'equal':
2025-07-01 17:49:08.410 atags += ' ' * la
2025-07-01 17:49:08.410 btags += ' ' * lb
2025-07-01 17:49:08.410 else:
2025-07-01 17:49:08.410 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.410 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.410 else:
2025-07-01 17:49:08.410 # the synch pair is identical
2025-07-01 17:49:08.410 yield ' ' + aelt
2025-07-01 17:49:08.410
2025-07-01 17:49:08.410 # pump out diffs from after the synch point
2025-07-01 17:49:08.410 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.410
2025-07-01 17:49:08.410 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.410 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.410
2025-07-01 17:49:08.410 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.410 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.410 alo = 389, ahi = 1101
2025-07-01 17:49:08.410 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.410 blo = 389, bhi = 1101
2025-07-01 17:49:08.411
2025-07-01 17:49:08.411 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.411 g = []
2025-07-01 17:49:08.411 if alo < ahi:
2025-07-01 17:49:08.411 if blo < bhi:
2025-07-01 17:49:08.411 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.411 else:
2025-07-01 17:49:08.411 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.411 elif blo < bhi:
2025-07-01 17:49:08.411 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.411
2025-07-01 17:49:08.411 > yield from g
2025-07-01 17:49:08.411
2025-07-01 17:49:08.411 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.411 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.411
2025-07-01 17:49:08.411 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.411 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.411 alo = 389, ahi = 1101
2025-07-01 17:49:08.411 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.412 blo = 389, bhi = 1101
2025-07-01 17:49:08.412
2025-07-01 17:49:08.412 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.412 r"""
2025-07-01 17:49:08.412 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.412 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.412 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.412 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.412
2025-07-01 17:49:08.412 Example:
2025-07-01 17:49:08.412
2025-07-01 17:49:08.412 >>> d = Differ()
2025-07-01 17:49:08.412 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.412 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.412 >>> print(''.join(results), end="")
2025-07-01 17:49:08.412 - abcDefghiJkl
2025-07-01 17:49:08.412 + abcdefGhijkl
2025-07-01 17:49:08.412 """
2025-07-01 17:49:08.412
2025-07-01 17:49:08.413 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.413 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.413 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.413 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.413 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.413
2025-07-01 17:49:08.413 # search for the pair that matches best without being identical
2025-07-01 17:49:08.413 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.413 # on junk -- unless we have to)
2025-07-01 17:49:08.413 for j in range(blo, bhi):
2025-07-01 17:49:08.413 bj = b[j]
2025-07-01 17:49:08.413 cruncher.set_seq2(bj)
2025-07-01 17:49:08.413 for i in range(alo, ahi):
2025-07-01 17:49:08.413 ai = a[i]
2025-07-01 17:49:08.413 if ai == bj:
2025-07-01 17:49:08.413 if eqi is None:
2025-07-01 17:49:08.413 eqi, eqj = i, j
2025-07-01 17:49:08.413 continue
2025-07-01 17:49:08.413 cruncher.set_seq1(ai)
2025-07-01 17:49:08.413 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.414 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.414 # compares by a factor of 3.
2025-07-01 17:49:08.414 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.414 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.414 # of the computation is cached by cruncher
2025-07-01 17:49:08.414 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.414 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.414 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.414 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.414 if best_ratio < cutoff:
2025-07-01 17:49:08.414 # no non-identical "pretty close" pair
2025-07-01 17:49:08.414 if eqi is None:
2025-07-01 17:49:08.414 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.414 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.414 return
2025-07-01 17:49:08.414 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.414 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.414 else:
2025-07-01 17:49:08.414 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.414 eqi = None
2025-07-01 17:49:08.415
2025-07-01 17:49:08.415 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.415 # identical
2025-07-01 17:49:08.415
2025-07-01 17:49:08.415 # pump out diffs from before the synch point
2025-07-01 17:49:08.415 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.415
2025-07-01 17:49:08.415 # do intraline marking on the synch pair
2025-07-01 17:49:08.415 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.415 if eqi is None:
2025-07-01 17:49:08.415 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.415 atags = btags = ""
2025-07-01 17:49:08.415 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.415 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.415 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.415 if tag == 'replace':
2025-07-01 17:49:08.415 atags += '^' * la
2025-07-01 17:49:08.415 btags += '^' * lb
2025-07-01 17:49:08.415 elif tag == 'delete':
2025-07-01 17:49:08.415 atags += '-' * la
2025-07-01 17:49:08.416 elif tag == 'insert':
2025-07-01 17:49:08.416 btags += '+' * lb
2025-07-01 17:49:08.416 elif tag == 'equal':
2025-07-01 17:49:08.416 atags += ' ' * la
2025-07-01 17:49:08.416 btags += ' ' * lb
2025-07-01 17:49:08.416 else:
2025-07-01 17:49:08.416 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.416 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.416 else:
2025-07-01 17:49:08.416 # the synch pair is identical
2025-07-01 17:49:08.416 yield ' ' + aelt
2025-07-01 17:49:08.416
2025-07-01 17:49:08.416 # pump out diffs from after the synch point
2025-07-01 17:49:08.416 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.416
2025-07-01 17:49:08.416 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.416 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.416
2025-07-01 17:49:08.416 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.416 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.416 alo = 390, ahi = 1101
2025-07-01 17:49:08.417 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.417 blo = 390, bhi = 1101
2025-07-01 17:49:08.417
2025-07-01 17:49:08.417 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.417 g = []
2025-07-01 17:49:08.417 if alo < ahi:
2025-07-01 17:49:08.417 if blo < bhi:
2025-07-01 17:49:08.417 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.417 else:
2025-07-01 17:49:08.417 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.417 elif blo < bhi:
2025-07-01 17:49:08.417 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.417
2025-07-01 17:49:08.417 > yield from g
2025-07-01 17:49:08.417
2025-07-01 17:49:08.417 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.417 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.417
2025-07-01 17:49:08.417 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.417 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.417 alo = 390, ahi = 1101
2025-07-01 17:49:08.418 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.418 blo = 390, bhi = 1101
2025-07-01 17:49:08.418
2025-07-01 17:49:08.418 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.418 r"""
2025-07-01 17:49:08.418 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.418 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.418 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.418 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.418
2025-07-01 17:49:08.418 Example:
2025-07-01 17:49:08.418
2025-07-01 17:49:08.418 >>> d = Differ()
2025-07-01 17:49:08.418 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.418 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.418 >>> print(''.join(results), end="")
2025-07-01 17:49:08.418 - abcDefghiJkl
2025-07-01 17:49:08.418 + abcdefGhijkl
2025-07-01 17:49:08.418 """
2025-07-01 17:49:08.419
2025-07-01 17:49:08.419 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.419 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.419 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.419 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.419 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.419
2025-07-01 17:49:08.419 # search for the pair that matches best without being identical
2025-07-01 17:49:08.419 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.419 # on junk -- unless we have to)
2025-07-01 17:49:08.419 for j in range(blo, bhi):
2025-07-01 17:49:08.419 bj = b[j]
2025-07-01 17:49:08.419 cruncher.set_seq2(bj)
2025-07-01 17:49:08.419 for i in range(alo, ahi):
2025-07-01 17:49:08.419 ai = a[i]
2025-07-01 17:49:08.419 if ai == bj:
2025-07-01 17:49:08.419 if eqi is None:
2025-07-01 17:49:08.419 eqi, eqj = i, j
2025-07-01 17:49:08.419 continue
2025-07-01 17:49:08.419 cruncher.set_seq1(ai)
2025-07-01 17:49:08.419 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.419 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.420 # compares by a factor of 3.
2025-07-01 17:49:08.424 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.424 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.425 # of the computation is cached by cruncher
2025-07-01 17:49:08.425 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.425 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.425 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.425 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.425 if best_ratio < cutoff:
2025-07-01 17:49:08.425 # no non-identical "pretty close" pair
2025-07-01 17:49:08.425 if eqi is None:
2025-07-01 17:49:08.425 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.425 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.425 return
2025-07-01 17:49:08.425 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.425 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.425 else:
2025-07-01 17:49:08.425 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.425 eqi = None
2025-07-01 17:49:08.425
2025-07-01 17:49:08.425 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.425 # identical
2025-07-01 17:49:08.425
2025-07-01 17:49:08.426 # pump out diffs from before the synch point
2025-07-01 17:49:08.426 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.426
2025-07-01 17:49:08.426 # do intraline marking on the synch pair
2025-07-01 17:49:08.426 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.426 if eqi is None:
2025-07-01 17:49:08.426 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.426 atags = btags = ""
2025-07-01 17:49:08.426 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.426 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.426 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.426 if tag == 'replace':
2025-07-01 17:49:08.426 atags += '^' * la
2025-07-01 17:49:08.426 btags += '^' * lb
2025-07-01 17:49:08.426 elif tag == 'delete':
2025-07-01 17:49:08.426 atags += '-' * la
2025-07-01 17:49:08.426 elif tag == 'insert':
2025-07-01 17:49:08.426 btags += '+' * lb
2025-07-01 17:49:08.426 elif tag == 'equal':
2025-07-01 17:49:08.426 atags += ' ' * la
2025-07-01 17:49:08.426 btags += ' ' * lb
2025-07-01 17:49:08.427 else:
2025-07-01 17:49:08.427 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.427 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.427 else:
2025-07-01 17:49:08.427 # the synch pair is identical
2025-07-01 17:49:08.427 yield ' ' + aelt
2025-07-01 17:49:08.427
2025-07-01 17:49:08.427 # pump out diffs from after the synch point
2025-07-01 17:49:08.427 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.427
2025-07-01 17:49:08.427 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.427 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.427
2025-07-01 17:49:08.427 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.427 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.427 alo = 391, ahi = 1101
2025-07-01 17:49:08.427 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.427 blo = 391, bhi = 1101
2025-07-01 17:49:08.427
2025-07-01 17:49:08.427 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.427 g = []
2025-07-01 17:49:08.428 if alo < ahi:
2025-07-01 17:49:08.428 if blo < bhi:
2025-07-01 17:49:08.428 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.428 else:
2025-07-01 17:49:08.428 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.428 elif blo < bhi:
2025-07-01 17:49:08.428 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.428
2025-07-01 17:49:08.428 > yield from g
2025-07-01 17:49:08.428
2025-07-01 17:49:08.428 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.428 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.428
2025-07-01 17:49:08.428 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.428 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.428 alo = 391, ahi = 1101
2025-07-01 17:49:08.428 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.428 blo = 391, bhi = 1101
2025-07-01 17:49:08.428
2025-07-01 17:49:08.429 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.429 r"""
2025-07-01 17:49:08.429 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.429 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.429 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.429 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.429
2025-07-01 17:49:08.429 Example:
2025-07-01 17:49:08.429
2025-07-01 17:49:08.429 >>> d = Differ()
2025-07-01 17:49:08.429 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.429 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.429 >>> print(''.join(results), end="")
2025-07-01 17:49:08.429 - abcDefghiJkl
2025-07-01 17:49:08.429 + abcdefGhijkl
2025-07-01 17:49:08.429 """
2025-07-01 17:49:08.429
2025-07-01 17:49:08.429 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.430 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.430 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.430 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.430 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.430
2025-07-01 17:49:08.430 # search for the pair that matches best without being identical
2025-07-01 17:49:08.430 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.430 # on junk -- unless we have to)
2025-07-01 17:49:08.430 for j in range(blo, bhi):
2025-07-01 17:49:08.430 bj = b[j]
2025-07-01 17:49:08.430 cruncher.set_seq2(bj)
2025-07-01 17:49:08.430 for i in range(alo, ahi):
2025-07-01 17:49:08.430 ai = a[i]
2025-07-01 17:49:08.430 if ai == bj:
2025-07-01 17:49:08.430 if eqi is None:
2025-07-01 17:49:08.430 eqi, eqj = i, j
2025-07-01 17:49:08.430 continue
2025-07-01 17:49:08.430 cruncher.set_seq1(ai)
2025-07-01 17:49:08.430 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.430 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.430 # compares by a factor of 3.
2025-07-01 17:49:08.431 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.431 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.431 # of the computation is cached by cruncher
2025-07-01 17:49:08.431 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.431 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.431 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.431 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.431 if best_ratio < cutoff:
2025-07-01 17:49:08.431 # no non-identical "pretty close" pair
2025-07-01 17:49:08.431 if eqi is None:
2025-07-01 17:49:08.431 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.431 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.431 return
2025-07-01 17:49:08.431 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.431 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.431 else:
2025-07-01 17:49:08.431 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.431 eqi = None
2025-07-01 17:49:08.431
2025-07-01 17:49:08.431 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.431 # identical
2025-07-01 17:49:08.431
2025-07-01 17:49:08.432 # pump out diffs from before the synch point
2025-07-01 17:49:08.432 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.432
2025-07-01 17:49:08.432 # do intraline marking on the synch pair
2025-07-01 17:49:08.432 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.432 if eqi is None:
2025-07-01 17:49:08.432 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.432 atags = btags = ""
2025-07-01 17:49:08.432 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.432 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.432 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.432 if tag == 'replace':
2025-07-01 17:49:08.432 atags += '^' * la
2025-07-01 17:49:08.432 btags += '^' * lb
2025-07-01 17:49:08.432 elif tag == 'delete':
2025-07-01 17:49:08.432 atags += '-' * la
2025-07-01 17:49:08.432 elif tag == 'insert':
2025-07-01 17:49:08.432 btags += '+' * lb
2025-07-01 17:49:08.432 elif tag == 'equal':
2025-07-01 17:49:08.432 atags += ' ' * la
2025-07-01 17:49:08.432 btags += ' ' * lb
2025-07-01 17:49:08.433 else:
2025-07-01 17:49:08.433 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.433 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.433 else:
2025-07-01 17:49:08.433 # the synch pair is identical
2025-07-01 17:49:08.433 yield ' ' + aelt
2025-07-01 17:49:08.433
2025-07-01 17:49:08.433 # pump out diffs from after the synch point
2025-07-01 17:49:08.433 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.433
2025-07-01 17:49:08.433 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.433 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.433
2025-07-01 17:49:08.433 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.433 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.433 alo = 392, ahi = 1101
2025-07-01 17:49:08.433 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.433 blo = 392, bhi = 1101
2025-07-01 17:49:08.433
2025-07-01 17:49:08.433 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.433 g = []
2025-07-01 17:49:08.433 if alo < ahi:
2025-07-01 17:49:08.434 if blo < bhi:
2025-07-01 17:49:08.434 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.434 else:
2025-07-01 17:49:08.434 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.434 elif blo < bhi:
2025-07-01 17:49:08.434 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.434
2025-07-01 17:49:08.434 > yield from g
2025-07-01 17:49:08.434
2025-07-01 17:49:08.434 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.434 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.434
2025-07-01 17:49:08.434 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.434 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.434 alo = 392, ahi = 1101
2025-07-01 17:49:08.434 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.434 blo = 392, bhi = 1101
2025-07-01 17:49:08.434
2025-07-01 17:49:08.434 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.434 r"""
2025-07-01 17:49:08.434 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.437 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.437 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.438 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.438
2025-07-01 17:49:08.438 Example:
2025-07-01 17:49:08.438
2025-07-01 17:49:08.438 >>> d = Differ()
2025-07-01 17:49:08.438 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.438 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.438 >>> print(''.join(results), end="")
2025-07-01 17:49:08.438 - abcDefghiJkl
2025-07-01 17:49:08.438 + abcdefGhijkl
2025-07-01 17:49:08.438 """
2025-07-01 17:49:08.438
2025-07-01 17:49:08.438 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.438 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.438 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.438 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.438 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.438
2025-07-01 17:49:08.439 # search for the pair that matches best without being identical
2025-07-01 17:49:08.439 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.439 # on junk -- unless we have to)
2025-07-01 17:49:08.439 for j in range(blo, bhi):
2025-07-01 17:49:08.439 bj = b[j]
2025-07-01 17:49:08.439 cruncher.set_seq2(bj)
2025-07-01 17:49:08.439 for i in range(alo, ahi):
2025-07-01 17:49:08.439 ai = a[i]
2025-07-01 17:49:08.439 if ai == bj:
2025-07-01 17:49:08.439 if eqi is None:
2025-07-01 17:49:08.439 eqi, eqj = i, j
2025-07-01 17:49:08.439 continue
2025-07-01 17:49:08.439 cruncher.set_seq1(ai)
2025-07-01 17:49:08.439 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.439 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.439 # compares by a factor of 3.
2025-07-01 17:49:08.439 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.439 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.439 # of the computation is cached by cruncher
2025-07-01 17:49:08.440 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.440 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.440 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.440 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.440 if best_ratio < cutoff:
2025-07-01 17:49:08.440 # no non-identical "pretty close" pair
2025-07-01 17:49:08.440 if eqi is None:
2025-07-01 17:49:08.440 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.440 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.440 return
2025-07-01 17:49:08.440 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.440 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.440 else:
2025-07-01 17:49:08.440 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.440 eqi = None
2025-07-01 17:49:08.440
2025-07-01 17:49:08.440 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.440 # identical
2025-07-01 17:49:08.440
2025-07-01 17:49:08.440 # pump out diffs from before the synch point
2025-07-01 17:49:08.441 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.441
2025-07-01 17:49:08.441 # do intraline marking on the synch pair
2025-07-01 17:49:08.441 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.441 if eqi is None:
2025-07-01 17:49:08.441 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.441 atags = btags = ""
2025-07-01 17:49:08.441 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.441 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.441 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.441 if tag == 'replace':
2025-07-01 17:49:08.441 atags += '^' * la
2025-07-01 17:49:08.441 btags += '^' * lb
2025-07-01 17:49:08.441 elif tag == 'delete':
2025-07-01 17:49:08.441 atags += '-' * la
2025-07-01 17:49:08.441 elif tag == 'insert':
2025-07-01 17:49:08.441 btags += '+' * lb
2025-07-01 17:49:08.441 elif tag == 'equal':
2025-07-01 17:49:08.441 atags += ' ' * la
2025-07-01 17:49:08.441 btags += ' ' * lb
2025-07-01 17:49:08.441 else:
2025-07-01 17:49:08.441 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.441 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.441 else:
2025-07-01 17:49:08.441 # the synch pair is identical
2025-07-01 17:49:08.442 yield ' ' + aelt
2025-07-01 17:49:08.442
2025-07-01 17:49:08.442 # pump out diffs from after the synch point
2025-07-01 17:49:08.442 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.442
2025-07-01 17:49:08.442 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.442 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.442
2025-07-01 17:49:08.442 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.442 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.442 alo = 393, ahi = 1101
2025-07-01 17:49:08.442 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.442 blo = 393, bhi = 1101
2025-07-01 17:49:08.442
2025-07-01 17:49:08.442 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.442 g = []
2025-07-01 17:49:08.442 if alo < ahi:
2025-07-01 17:49:08.442 if blo < bhi:
2025-07-01 17:49:08.442 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.442 else:
2025-07-01 17:49:08.442 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.443 elif blo < bhi:
2025-07-01 17:49:08.443 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.443
2025-07-01 17:49:08.443 > yield from g
2025-07-01 17:49:08.443
2025-07-01 17:49:08.443 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.443 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.443
2025-07-01 17:49:08.443 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.443 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.443 alo = 393, ahi = 1101
2025-07-01 17:49:08.443 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.443 blo = 393, bhi = 1101
2025-07-01 17:49:08.443
2025-07-01 17:49:08.443 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.443 r"""
2025-07-01 17:49:08.443 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.443 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.443 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.443 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.443
2025-07-01 17:49:08.444 Example:
2025-07-01 17:49:08.444
2025-07-01 17:49:08.444 >>> d = Differ()
2025-07-01 17:49:08.444 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.444 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.444 >>> print(''.join(results), end="")
2025-07-01 17:49:08.444 - abcDefghiJkl
2025-07-01 17:49:08.444 + abcdefGhijkl
2025-07-01 17:49:08.444 """
2025-07-01 17:49:08.444
2025-07-01 17:49:08.444 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.444 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.444 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.444 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.444 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.444
2025-07-01 17:49:08.444 # search for the pair that matches best without being identical
2025-07-01 17:49:08.444 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.444 # on junk -- unless we have to)
2025-07-01 17:49:08.444 for j in range(blo, bhi):
2025-07-01 17:49:08.445 bj = b[j]
2025-07-01 17:49:08.445 cruncher.set_seq2(bj)
2025-07-01 17:49:08.445 for i in range(alo, ahi):
2025-07-01 17:49:08.445 ai = a[i]
2025-07-01 17:49:08.445 if ai == bj:
2025-07-01 17:49:08.445 if eqi is None:
2025-07-01 17:49:08.445 eqi, eqj = i, j
2025-07-01 17:49:08.445 continue
2025-07-01 17:49:08.445 cruncher.set_seq1(ai)
2025-07-01 17:49:08.445 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.445 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.445 # compares by a factor of 3.
2025-07-01 17:49:08.445 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.445 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.445 # of the computation is cached by cruncher
2025-07-01 17:49:08.445 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.445 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.445 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.445 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.445 if best_ratio < cutoff:
2025-07-01 17:49:08.445 # no non-identical "pretty close" pair
2025-07-01 17:49:08.446 if eqi is None:
2025-07-01 17:49:08.446 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.446 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.446 return
2025-07-01 17:49:08.446 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.446 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.446 else:
2025-07-01 17:49:08.446 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.446 eqi = None
2025-07-01 17:49:08.446
2025-07-01 17:49:08.446 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.446 # identical
2025-07-01 17:49:08.446
2025-07-01 17:49:08.446 # pump out diffs from before the synch point
2025-07-01 17:49:08.446 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.446
2025-07-01 17:49:08.446 # do intraline marking on the synch pair
2025-07-01 17:49:08.446 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.446 if eqi is None:
2025-07-01 17:49:08.446 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.446 atags = btags = ""
2025-07-01 17:49:08.446 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.447 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.447 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.447 if tag == 'replace':
2025-07-01 17:49:08.447 atags += '^' * la
2025-07-01 17:49:08.447 btags += '^' * lb
2025-07-01 17:49:08.447 elif tag == 'delete':
2025-07-01 17:49:08.447 atags += '-' * la
2025-07-01 17:49:08.447 elif tag == 'insert':
2025-07-01 17:49:08.447 btags += '+' * lb
2025-07-01 17:49:08.447 elif tag == 'equal':
2025-07-01 17:49:08.447 atags += ' ' * la
2025-07-01 17:49:08.447 btags += ' ' * lb
2025-07-01 17:49:08.447 else:
2025-07-01 17:49:08.447 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.447 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.447 else:
2025-07-01 17:49:08.447 # the synch pair is identical
2025-07-01 17:49:08.447 yield ' ' + aelt
2025-07-01 17:49:08.447
2025-07-01 17:49:08.448 # pump out diffs from after the synch point
2025-07-01 17:49:08.448 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.448
2025-07-01 17:49:08.448 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.448 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.448
2025-07-01 17:49:08.448 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.448 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.448 alo = 394, ahi = 1101
2025-07-01 17:49:08.448 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.448 blo = 394, bhi = 1101
2025-07-01 17:49:08.448
2025-07-01 17:49:08.448 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.448 g = []
2025-07-01 17:49:08.448 if alo < ahi:
2025-07-01 17:49:08.448 if blo < bhi:
2025-07-01 17:49:08.448 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.448 else:
2025-07-01 17:49:08.448 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.448 elif blo < bhi:
2025-07-01 17:49:08.449 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.449
2025-07-01 17:49:08.449 > yield from g
2025-07-01 17:49:08.449
2025-07-01 17:49:08.449 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.449 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.449
2025-07-01 17:49:08.449 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.449 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.449 alo = 394, ahi = 1101
2025-07-01 17:49:08.449 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.449 blo = 394, bhi = 1101
2025-07-01 17:49:08.449
2025-07-01 17:49:08.449 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.449 r"""
2025-07-01 17:49:08.449 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.449 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.449 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.449 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.449
2025-07-01 17:49:08.449 Example:
2025-07-01 17:49:08.456
2025-07-01 17:49:08.456 >>> d = Differ()
2025-07-01 17:49:08.456 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.456 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.456 >>> print(''.join(results), end="")
2025-07-01 17:49:08.456 - abcDefghiJkl
2025-07-01 17:49:08.456 + abcdefGhijkl
2025-07-01 17:49:08.456 """
2025-07-01 17:49:08.456
2025-07-01 17:49:08.456 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.456 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.456 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.456 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.456 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.456
2025-07-01 17:49:08.456 # search for the pair that matches best without being identical
2025-07-01 17:49:08.456 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.456 # on junk -- unless we have to)
2025-07-01 17:49:08.457 for j in range(blo, bhi):
2025-07-01 17:49:08.457 bj = b[j]
2025-07-01 17:49:08.457 cruncher.set_seq2(bj)
2025-07-01 17:49:08.457 for i in range(alo, ahi):
2025-07-01 17:49:08.457 ai = a[i]
2025-07-01 17:49:08.457 if ai == bj:
2025-07-01 17:49:08.457 if eqi is None:
2025-07-01 17:49:08.457 eqi, eqj = i, j
2025-07-01 17:49:08.457 continue
2025-07-01 17:49:08.457 cruncher.set_seq1(ai)
2025-07-01 17:49:08.457 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.457 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.457 # compares by a factor of 3.
2025-07-01 17:49:08.457 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.457 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.457 # of the computation is cached by cruncher
2025-07-01 17:49:08.457 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.457 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.457 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.457 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.458 if best_ratio < cutoff:
2025-07-01 17:49:08.458 # no non-identical "pretty close" pair
2025-07-01 17:49:08.458 if eqi is None:
2025-07-01 17:49:08.458 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.458 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.458 return
2025-07-01 17:49:08.458 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.458 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.458 else:
2025-07-01 17:49:08.458 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.458 eqi = None
2025-07-01 17:49:08.458
2025-07-01 17:49:08.458 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.458 # identical
2025-07-01 17:49:08.458
2025-07-01 17:49:08.458 # pump out diffs from before the synch point
2025-07-01 17:49:08.458 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.458
2025-07-01 17:49:08.458 # do intraline marking on the synch pair
2025-07-01 17:49:08.458 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.458 if eqi is None:
2025-07-01 17:49:08.459 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.459 atags = btags = ""
2025-07-01 17:49:08.459 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.459 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.459 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.459 if tag == 'replace':
2025-07-01 17:49:08.459 atags += '^' * la
2025-07-01 17:49:08.459 btags += '^' * lb
2025-07-01 17:49:08.459 elif tag == 'delete':
2025-07-01 17:49:08.459 atags += '-' * la
2025-07-01 17:49:08.459 elif tag == 'insert':
2025-07-01 17:49:08.459 btags += '+' * lb
2025-07-01 17:49:08.459 elif tag == 'equal':
2025-07-01 17:49:08.459 atags += ' ' * la
2025-07-01 17:49:08.459 btags += ' ' * lb
2025-07-01 17:49:08.459 else:
2025-07-01 17:49:08.459 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.459 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.459 else:
2025-07-01 17:49:08.459 # the synch pair is identical
2025-07-01 17:49:08.459 yield ' ' + aelt
2025-07-01 17:49:08.459
2025-07-01 17:49:08.460 # pump out diffs from after the synch point
2025-07-01 17:49:08.460 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.460
2025-07-01 17:49:08.460 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.460 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.460
2025-07-01 17:49:08.460 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.460 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.460 alo = 395, ahi = 1101
2025-07-01 17:49:08.460 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.460 blo = 395, bhi = 1101
2025-07-01 17:49:08.460
2025-07-01 17:49:08.460 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.460 g = []
2025-07-01 17:49:08.460 if alo < ahi:
2025-07-01 17:49:08.460 if blo < bhi:
2025-07-01 17:49:08.460 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.460 else:
2025-07-01 17:49:08.460 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.460 elif blo < bhi:
2025-07-01 17:49:08.460 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.461
2025-07-01 17:49:08.461 > yield from g
2025-07-01 17:49:08.461
2025-07-01 17:49:08.461 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.461 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.461
2025-07-01 17:49:08.461 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.461 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.461 alo = 395, ahi = 1101
2025-07-01 17:49:08.461 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.461 blo = 395, bhi = 1101
2025-07-01 17:49:08.461
2025-07-01 17:49:08.461 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.461 r"""
2025-07-01 17:49:08.461 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.461 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.461 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.461 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.462
2025-07-01 17:49:08.462 Example:
2025-07-01 17:49:08.462
2025-07-01 17:49:08.462 >>> d = Differ()
2025-07-01 17:49:08.462 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.462 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.462 >>> print(''.join(results), end="")
2025-07-01 17:49:08.462 - abcDefghiJkl
2025-07-01 17:49:08.462 + abcdefGhijkl
2025-07-01 17:49:08.462 """
2025-07-01 17:49:08.462
2025-07-01 17:49:08.462 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.462 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.462 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.462 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.462 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.462
2025-07-01 17:49:08.462 # search for the pair that matches best without being identical
2025-07-01 17:49:08.462 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.463 # on junk -- unless we have to)
2025-07-01 17:49:08.463 for j in range(blo, bhi):
2025-07-01 17:49:08.463 bj = b[j]
2025-07-01 17:49:08.463 cruncher.set_seq2(bj)
2025-07-01 17:49:08.463 for i in range(alo, ahi):
2025-07-01 17:49:08.463 ai = a[i]
2025-07-01 17:49:08.463 if ai == bj:
2025-07-01 17:49:08.463 if eqi is None:
2025-07-01 17:49:08.463 eqi, eqj = i, j
2025-07-01 17:49:08.463 continue
2025-07-01 17:49:08.463 cruncher.set_seq1(ai)
2025-07-01 17:49:08.463 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.463 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.463 # compares by a factor of 3.
2025-07-01 17:49:08.463 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.463 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.463 # of the computation is cached by cruncher
2025-07-01 17:49:08.463 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.463 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.463 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.464 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.464 if best_ratio < cutoff:
2025-07-01 17:49:08.464 # no non-identical "pretty close" pair
2025-07-01 17:49:08.464 if eqi is None:
2025-07-01 17:49:08.464 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.464 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.464 return
2025-07-01 17:49:08.464 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.464 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.464 else:
2025-07-01 17:49:08.464 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.464 eqi = None
2025-07-01 17:49:08.464
2025-07-01 17:49:08.464 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.464 # identical
2025-07-01 17:49:08.464
2025-07-01 17:49:08.464 # pump out diffs from before the synch point
2025-07-01 17:49:08.464 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.464
2025-07-01 17:49:08.464 # do intraline marking on the synch pair
2025-07-01 17:49:08.465 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.465 if eqi is None:
2025-07-01 17:49:08.465 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.465 atags = btags = ""
2025-07-01 17:49:08.465 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.465 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.465 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.465 if tag == 'replace':
2025-07-01 17:49:08.465 atags += '^' * la
2025-07-01 17:49:08.465 btags += '^' * lb
2025-07-01 17:49:08.465 elif tag == 'delete':
2025-07-01 17:49:08.465 atags += '-' * la
2025-07-01 17:49:08.465 elif tag == 'insert':
2025-07-01 17:49:08.465 btags += '+' * lb
2025-07-01 17:49:08.465 elif tag == 'equal':
2025-07-01 17:49:08.465 atags += ' ' * la
2025-07-01 17:49:08.465 btags += ' ' * lb
2025-07-01 17:49:08.465 else:
2025-07-01 17:49:08.465 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.465 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.465 else:
2025-07-01 17:49:08.466 # the synch pair is identical
2025-07-01 17:49:08.468 yield ' ' + aelt
2025-07-01 17:49:08.468
2025-07-01 17:49:08.469 # pump out diffs from after the synch point
2025-07-01 17:49:08.469 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.469
2025-07-01 17:49:08.469 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.469 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.469
2025-07-01 17:49:08.469 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.469 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.469 alo = 396, ahi = 1101
2025-07-01 17:49:08.469 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.469 blo = 396, bhi = 1101
2025-07-01 17:49:08.469
2025-07-01 17:49:08.469 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.469 g = []
2025-07-01 17:49:08.469 if alo < ahi:
2025-07-01 17:49:08.469 if blo < bhi:
2025-07-01 17:49:08.469 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.469 else:
2025-07-01 17:49:08.469 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.469 elif blo < bhi:
2025-07-01 17:49:08.469 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.470
2025-07-01 17:49:08.470 > yield from g
2025-07-01 17:49:08.470
2025-07-01 17:49:08.470 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.470 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.470
2025-07-01 17:49:08.470 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.470 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.470 alo = 396, ahi = 1101
2025-07-01 17:49:08.470 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.470 blo = 396, bhi = 1101
2025-07-01 17:49:08.470
2025-07-01 17:49:08.470 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.470 r"""
2025-07-01 17:49:08.470 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.470 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.470 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.470 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.470
2025-07-01 17:49:08.471 Example:
2025-07-01 17:49:08.471
2025-07-01 17:49:08.471 >>> d = Differ()
2025-07-01 17:49:08.471 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.471 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.471 >>> print(''.join(results), end="")
2025-07-01 17:49:08.471 - abcDefghiJkl
2025-07-01 17:49:08.471 + abcdefGhijkl
2025-07-01 17:49:08.471 """
2025-07-01 17:49:08.471
2025-07-01 17:49:08.471 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.471 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.471 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.471 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.471 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.471
2025-07-01 17:49:08.471 # search for the pair that matches best without being identical
2025-07-01 17:49:08.471 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.471 # on junk -- unless we have to)
2025-07-01 17:49:08.472 for j in range(blo, bhi):
2025-07-01 17:49:08.472 bj = b[j]
2025-07-01 17:49:08.472 cruncher.set_seq2(bj)
2025-07-01 17:49:08.472 for i in range(alo, ahi):
2025-07-01 17:49:08.472 ai = a[i]
2025-07-01 17:49:08.472 if ai == bj:
2025-07-01 17:49:08.472 if eqi is None:
2025-07-01 17:49:08.472 eqi, eqj = i, j
2025-07-01 17:49:08.472 continue
2025-07-01 17:49:08.472 cruncher.set_seq1(ai)
2025-07-01 17:49:08.472 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.472 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.472 # compares by a factor of 3.
2025-07-01 17:49:08.472 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.472 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.472 # of the computation is cached by cruncher
2025-07-01 17:49:08.472 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.472 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.472 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.472 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.473 if best_ratio < cutoff:
2025-07-01 17:49:08.473 # no non-identical "pretty close" pair
2025-07-01 17:49:08.473 if eqi is None:
2025-07-01 17:49:08.473 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.473 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.473 return
2025-07-01 17:49:08.473 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.473 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.473 else:
2025-07-01 17:49:08.473 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.473 eqi = None
2025-07-01 17:49:08.473
2025-07-01 17:49:08.473 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.473 # identical
2025-07-01 17:49:08.473
2025-07-01 17:49:08.473 # pump out diffs from before the synch point
2025-07-01 17:49:08.473 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.473
2025-07-01 17:49:08.473 # do intraline marking on the synch pair
2025-07-01 17:49:08.474 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.474 if eqi is None:
2025-07-01 17:49:08.474 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.474 atags = btags = ""
2025-07-01 17:49:08.474 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.474 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.474 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.474 if tag == 'replace':
2025-07-01 17:49:08.474 atags += '^' * la
2025-07-01 17:49:08.474 btags += '^' * lb
2025-07-01 17:49:08.474 elif tag == 'delete':
2025-07-01 17:49:08.474 atags += '-' * la
2025-07-01 17:49:08.474 elif tag == 'insert':
2025-07-01 17:49:08.474 btags += '+' * lb
2025-07-01 17:49:08.474 elif tag == 'equal':
2025-07-01 17:49:08.474 atags += ' ' * la
2025-07-01 17:49:08.474 btags += ' ' * lb
2025-07-01 17:49:08.474 else:
2025-07-01 17:49:08.474 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.474 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.475 else:
2025-07-01 17:49:08.475 # the synch pair is identical
2025-07-01 17:49:08.475 yield ' ' + aelt
2025-07-01 17:49:08.475
2025-07-01 17:49:08.475 # pump out diffs from after the synch point
2025-07-01 17:49:08.475 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.475
2025-07-01 17:49:08.475 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.475 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.475
2025-07-01 17:49:08.475 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.475 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.475 alo = 397, ahi = 1101
2025-07-01 17:49:08.475 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.475 blo = 397, bhi = 1101
2025-07-01 17:49:08.475
2025-07-01 17:49:08.475 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.475 g = []
2025-07-01 17:49:08.475 if alo < ahi:
2025-07-01 17:49:08.475 if blo < bhi:
2025-07-01 17:49:08.476 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.476 else:
2025-07-01 17:49:08.476 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.476 elif blo < bhi:
2025-07-01 17:49:08.476 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.476
2025-07-01 17:49:08.476 > yield from g
2025-07-01 17:49:08.476
2025-07-01 17:49:08.476 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.476 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.476
2025-07-01 17:49:08.476 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.476 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.476 alo = 397, ahi = 1101
2025-07-01 17:49:08.476 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.476 blo = 397, bhi = 1101
2025-07-01 17:49:08.476
2025-07-01 17:49:08.476 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.476 r"""
2025-07-01 17:49:08.476 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.476 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.476 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.477 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.477
2025-07-01 17:49:08.477 Example:
2025-07-01 17:49:08.477
2025-07-01 17:49:08.477 >>> d = Differ()
2025-07-01 17:49:08.477 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.477 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.477 >>> print(''.join(results), end="")
2025-07-01 17:49:08.477 - abcDefghiJkl
2025-07-01 17:49:08.477 + abcdefGhijkl
2025-07-01 17:49:08.477 """
2025-07-01 17:49:08.477
2025-07-01 17:49:08.477 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.477 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.477 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.477 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.477 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.477
2025-07-01 17:49:08.477 # search for the pair that matches best without being identical
2025-07-01 17:49:08.478 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.478 # on junk -- unless we have to)
2025-07-01 17:49:08.478 for j in range(blo, bhi):
2025-07-01 17:49:08.478 bj = b[j]
2025-07-01 17:49:08.478 cruncher.set_seq2(bj)
2025-07-01 17:49:08.478 for i in range(alo, ahi):
2025-07-01 17:49:08.478 ai = a[i]
2025-07-01 17:49:08.478 if ai == bj:
2025-07-01 17:49:08.478 if eqi is None:
2025-07-01 17:49:08.478 eqi, eqj = i, j
2025-07-01 17:49:08.478 continue
2025-07-01 17:49:08.478 cruncher.set_seq1(ai)
2025-07-01 17:49:08.478 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.478 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.478 # compares by a factor of 3.
2025-07-01 17:49:08.478 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.478 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.478 # of the computation is cached by cruncher
2025-07-01 17:49:08.478 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.478 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.478 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.479 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.479 if best_ratio < cutoff:
2025-07-01 17:49:08.479 # no non-identical "pretty close" pair
2025-07-01 17:49:08.479 if eqi is None:
2025-07-01 17:49:08.479 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.479 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.479 return
2025-07-01 17:49:08.479 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.479 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.479 else:
2025-07-01 17:49:08.479 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.479 eqi = None
2025-07-01 17:49:08.479
2025-07-01 17:49:08.479 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.479 # identical
2025-07-01 17:49:08.479
2025-07-01 17:49:08.479 # pump out diffs from before the synch point
2025-07-01 17:49:08.479 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.479
2025-07-01 17:49:08.479 # do intraline marking on the synch pair
2025-07-01 17:49:08.479 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.479 if eqi is None:
2025-07-01 17:49:08.480 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.480 atags = btags = ""
2025-07-01 17:49:08.480 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.480 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.480 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.480 if tag == 'replace':
2025-07-01 17:49:08.480 atags += '^' * la
2025-07-01 17:49:08.480 btags += '^' * lb
2025-07-01 17:49:08.480 elif tag == 'delete':
2025-07-01 17:49:08.480 atags += '-' * la
2025-07-01 17:49:08.480 elif tag == 'insert':
2025-07-01 17:49:08.480 btags += '+' * lb
2025-07-01 17:49:08.480 elif tag == 'equal':
2025-07-01 17:49:08.480 atags += ' ' * la
2025-07-01 17:49:08.480 btags += ' ' * lb
2025-07-01 17:49:08.480 else:
2025-07-01 17:49:08.480 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.480 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.480 else:
2025-07-01 17:49:08.480 # the synch pair is identical
2025-07-01 17:49:08.480 yield ' ' + aelt
2025-07-01 17:49:08.481
2025-07-01 17:49:08.481 # pump out diffs from after the synch point
2025-07-01 17:49:08.481 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.481
2025-07-01 17:49:08.481 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.481 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.481
2025-07-01 17:49:08.481 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.481 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.481 alo = 400, ahi = 1101
2025-07-01 17:49:08.481 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.481 blo = 400, bhi = 1101
2025-07-01 17:49:08.481
2025-07-01 17:49:08.481 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.481 g = []
2025-07-01 17:49:08.481 if alo < ahi:
2025-07-01 17:49:08.481 if blo < bhi:
2025-07-01 17:49:08.481 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.481 else:
2025-07-01 17:49:08.481 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.487 elif blo < bhi:
2025-07-01 17:49:08.487 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.487
2025-07-01 17:49:08.487 > yield from g
2025-07-01 17:49:08.487
2025-07-01 17:49:08.487 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.487 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.487
2025-07-01 17:49:08.487 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.487 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.487 alo = 400, ahi = 1101
2025-07-01 17:49:08.487 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.487 blo = 400, bhi = 1101
2025-07-01 17:49:08.487
2025-07-01 17:49:08.487 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.487 r"""
2025-07-01 17:49:08.487 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.487 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.488 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.488 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.488
2025-07-01 17:49:08.488 Example:
2025-07-01 17:49:08.488
2025-07-01 17:49:08.488 >>> d = Differ()
2025-07-01 17:49:08.488 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.488 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.488 >>> print(''.join(results), end="")
2025-07-01 17:49:08.488 - abcDefghiJkl
2025-07-01 17:49:08.488 + abcdefGhijkl
2025-07-01 17:49:08.488 """
2025-07-01 17:49:08.488
2025-07-01 17:49:08.488 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.488 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.488 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.488 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.488 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.488
2025-07-01 17:49:08.489 # search for the pair that matches best without being identical
2025-07-01 17:49:08.489 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.489 # on junk -- unless we have to)
2025-07-01 17:49:08.489 for j in range(blo, bhi):
2025-07-01 17:49:08.489 bj = b[j]
2025-07-01 17:49:08.489 cruncher.set_seq2(bj)
2025-07-01 17:49:08.489 for i in range(alo, ahi):
2025-07-01 17:49:08.489 ai = a[i]
2025-07-01 17:49:08.489 if ai == bj:
2025-07-01 17:49:08.489 if eqi is None:
2025-07-01 17:49:08.489 eqi, eqj = i, j
2025-07-01 17:49:08.489 continue
2025-07-01 17:49:08.489 cruncher.set_seq1(ai)
2025-07-01 17:49:08.489 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.489 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.489 # compares by a factor of 3.
2025-07-01 17:49:08.489 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.489 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.489 # of the computation is cached by cruncher
2025-07-01 17:49:08.489 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.490 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.490 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.490 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.490 if best_ratio < cutoff:
2025-07-01 17:49:08.490 # no non-identical "pretty close" pair
2025-07-01 17:49:08.490 if eqi is None:
2025-07-01 17:49:08.490 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.490 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.490 return
2025-07-01 17:49:08.490 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.490 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.490 else:
2025-07-01 17:49:08.490 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.490 eqi = None
2025-07-01 17:49:08.490
2025-07-01 17:49:08.490 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.490 # identical
2025-07-01 17:49:08.490
2025-07-01 17:49:08.490 # pump out diffs from before the synch point
2025-07-01 17:49:08.490 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.490
2025-07-01 17:49:08.491 # do intraline marking on the synch pair
2025-07-01 17:49:08.491 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.491 if eqi is None:
2025-07-01 17:49:08.491 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.491 atags = btags = ""
2025-07-01 17:49:08.491 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.491 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.491 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.491 if tag == 'replace':
2025-07-01 17:49:08.491 atags += '^' * la
2025-07-01 17:49:08.491 btags += '^' * lb
2025-07-01 17:49:08.491 elif tag == 'delete':
2025-07-01 17:49:08.491 atags += '-' * la
2025-07-01 17:49:08.491 elif tag == 'insert':
2025-07-01 17:49:08.491 btags += '+' * lb
2025-07-01 17:49:08.491 elif tag == 'equal':
2025-07-01 17:49:08.491 atags += ' ' * la
2025-07-01 17:49:08.491 btags += ' ' * lb
2025-07-01 17:49:08.491 else:
2025-07-01 17:49:08.491 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.491 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.491 else:
2025-07-01 17:49:08.492 # the synch pair is identical
2025-07-01 17:49:08.492 yield ' ' + aelt
2025-07-01 17:49:08.492
2025-07-01 17:49:08.492 # pump out diffs from after the synch point
2025-07-01 17:49:08.492 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.492
2025-07-01 17:49:08.492 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.492 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.492
2025-07-01 17:49:08.492 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.492 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.492 alo = 401, ahi = 1101
2025-07-01 17:49:08.492 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.492 blo = 401, bhi = 1101
2025-07-01 17:49:08.492
2025-07-01 17:49:08.492 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.492 g = []
2025-07-01 17:49:08.492 if alo < ahi:
2025-07-01 17:49:08.492 if blo < bhi:
2025-07-01 17:49:08.492 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.493 else:
2025-07-01 17:49:08.493 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.493 elif blo < bhi:
2025-07-01 17:49:08.493 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.493
2025-07-01 17:49:08.493 > yield from g
2025-07-01 17:49:08.493
2025-07-01 17:49:08.493 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.493 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.493
2025-07-01 17:49:08.493 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.493 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.493 alo = 401, ahi = 1101
2025-07-01 17:49:08.493 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.493 blo = 401, bhi = 1101
2025-07-01 17:49:08.493
2025-07-01 17:49:08.493 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.493 r"""
2025-07-01 17:49:08.493 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.493 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.493 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.493 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.494
2025-07-01 17:49:08.494 Example:
2025-07-01 17:49:08.494
2025-07-01 17:49:08.494 >>> d = Differ()
2025-07-01 17:49:08.494 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.494 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.494 >>> print(''.join(results), end="")
2025-07-01 17:49:08.494 - abcDefghiJkl
2025-07-01 17:49:08.494 + abcdefGhijkl
2025-07-01 17:49:08.494 """
2025-07-01 17:49:08.494
2025-07-01 17:49:08.494 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.494 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.494 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.494 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.494 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.494
2025-07-01 17:49:08.495 # search for the pair that matches best without being identical
2025-07-01 17:49:08.495 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.495 # on junk -- unless we have to)
2025-07-01 17:49:08.495 for j in range(blo, bhi):
2025-07-01 17:49:08.495 bj = b[j]
2025-07-01 17:49:08.495 cruncher.set_seq2(bj)
2025-07-01 17:49:08.495 for i in range(alo, ahi):
2025-07-01 17:49:08.495 ai = a[i]
2025-07-01 17:49:08.495 if ai == bj:
2025-07-01 17:49:08.495 if eqi is None:
2025-07-01 17:49:08.495 eqi, eqj = i, j
2025-07-01 17:49:08.495 continue
2025-07-01 17:49:08.495 cruncher.set_seq1(ai)
2025-07-01 17:49:08.495 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.495 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.495 # compares by a factor of 3.
2025-07-01 17:49:08.495 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.495 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.495 # of the computation is cached by cruncher
2025-07-01 17:49:08.495 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.496 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.496 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.496 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.496 if best_ratio < cutoff:
2025-07-01 17:49:08.496 # no non-identical "pretty close" pair
2025-07-01 17:49:08.496 if eqi is None:
2025-07-01 17:49:08.496 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.496 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.496 return
2025-07-01 17:49:08.496 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.496 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.496 else:
2025-07-01 17:49:08.496 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.496 eqi = None
2025-07-01 17:49:08.496
2025-07-01 17:49:08.496 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.496 # identical
2025-07-01 17:49:08.496
2025-07-01 17:49:08.496 # pump out diffs from before the synch point
2025-07-01 17:49:08.496 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.496
2025-07-01 17:49:08.497 # do intraline marking on the synch pair
2025-07-01 17:49:08.497 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.497 if eqi is None:
2025-07-01 17:49:08.497 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.497 atags = btags = ""
2025-07-01 17:49:08.497 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.497 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.497 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.497 if tag == 'replace':
2025-07-01 17:49:08.497 atags += '^' * la
2025-07-01 17:49:08.497 btags += '^' * lb
2025-07-01 17:49:08.497 elif tag == 'delete':
2025-07-01 17:49:08.497 atags += '-' * la
2025-07-01 17:49:08.497 elif tag == 'insert':
2025-07-01 17:49:08.497 btags += '+' * lb
2025-07-01 17:49:08.497 elif tag == 'equal':
2025-07-01 17:49:08.497 atags += ' ' * la
2025-07-01 17:49:08.497 btags += ' ' * lb
2025-07-01 17:49:08.497 else:
2025-07-01 17:49:08.497 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.498 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.501 else:
2025-07-01 17:49:08.501 # the synch pair is identical
2025-07-01 17:49:08.501 yield ' ' + aelt
2025-07-01 17:49:08.501
2025-07-01 17:49:08.501 # pump out diffs from after the synch point
2025-07-01 17:49:08.501 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.501
2025-07-01 17:49:08.501 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.501 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.501
2025-07-01 17:49:08.501 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.501 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.501 alo = 402, ahi = 1101
2025-07-01 17:49:08.501 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.501 blo = 402, bhi = 1101
2025-07-01 17:49:08.501
2025-07-01 17:49:08.501 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.501 g = []
2025-07-01 17:49:08.501 if alo < ahi:
2025-07-01 17:49:08.501 if blo < bhi:
2025-07-01 17:49:08.502 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.502 else:
2025-07-01 17:49:08.502 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.502 elif blo < bhi:
2025-07-01 17:49:08.502 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.502
2025-07-01 17:49:08.502 > yield from g
2025-07-01 17:49:08.502
2025-07-01 17:49:08.502 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.502 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.502
2025-07-01 17:49:08.502 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.502 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.502 alo = 402, ahi = 1101
2025-07-01 17:49:08.502 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.502 blo = 402, bhi = 1101
2025-07-01 17:49:08.502
2025-07-01 17:49:08.502 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.502 r"""
2025-07-01 17:49:08.502 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.502 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.503 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.503 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.503
2025-07-01 17:49:08.503 Example:
2025-07-01 17:49:08.503
2025-07-01 17:49:08.503 >>> d = Differ()
2025-07-01 17:49:08.503 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.503 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.503 >>> print(''.join(results), end="")
2025-07-01 17:49:08.503 - abcDefghiJkl
2025-07-01 17:49:08.503 + abcdefGhijkl
2025-07-01 17:49:08.503 """
2025-07-01 17:49:08.503
2025-07-01 17:49:08.503 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.503 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.503 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.503 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.503 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.503
2025-07-01 17:49:08.504 # search for the pair that matches best without being identical
2025-07-01 17:49:08.504 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.504 # on junk -- unless we have to)
2025-07-01 17:49:08.504 for j in range(blo, bhi):
2025-07-01 17:49:08.504 bj = b[j]
2025-07-01 17:49:08.504 cruncher.set_seq2(bj)
2025-07-01 17:49:08.504 for i in range(alo, ahi):
2025-07-01 17:49:08.504 ai = a[i]
2025-07-01 17:49:08.504 if ai == bj:
2025-07-01 17:49:08.504 if eqi is None:
2025-07-01 17:49:08.504 eqi, eqj = i, j
2025-07-01 17:49:08.504 continue
2025-07-01 17:49:08.504 cruncher.set_seq1(ai)
2025-07-01 17:49:08.504 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.504 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.504 # compares by a factor of 3.
2025-07-01 17:49:08.504 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.504 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.504 # of the computation is cached by cruncher
2025-07-01 17:49:08.504 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.505 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.505 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.505 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.505 if best_ratio < cutoff:
2025-07-01 17:49:08.505 # no non-identical "pretty close" pair
2025-07-01 17:49:08.505 if eqi is None:
2025-07-01 17:49:08.505 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.505 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.505 return
2025-07-01 17:49:08.505 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.505 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.505 else:
2025-07-01 17:49:08.505 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.505 eqi = None
2025-07-01 17:49:08.505
2025-07-01 17:49:08.505 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.505 # identical
2025-07-01 17:49:08.505
2025-07-01 17:49:08.505 # pump out diffs from before the synch point
2025-07-01 17:49:08.505 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.505
2025-07-01 17:49:08.506 # do intraline marking on the synch pair
2025-07-01 17:49:08.506 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.506 if eqi is None:
2025-07-01 17:49:08.506 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.506 atags = btags = ""
2025-07-01 17:49:08.506 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.506 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.506 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.506 if tag == 'replace':
2025-07-01 17:49:08.506 atags += '^' * la
2025-07-01 17:49:08.506 btags += '^' * lb
2025-07-01 17:49:08.506 elif tag == 'delete':
2025-07-01 17:49:08.506 atags += '-' * la
2025-07-01 17:49:08.506 elif tag == 'insert':
2025-07-01 17:49:08.506 btags += '+' * lb
2025-07-01 17:49:08.506 elif tag == 'equal':
2025-07-01 17:49:08.506 atags += ' ' * la
2025-07-01 17:49:08.506 btags += ' ' * lb
2025-07-01 17:49:08.506 else:
2025-07-01 17:49:08.507 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.507 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.507 else:
2025-07-01 17:49:08.507 # the synch pair is identical
2025-07-01 17:49:08.507 yield ' ' + aelt
2025-07-01 17:49:08.507
2025-07-01 17:49:08.507 # pump out diffs from after the synch point
2025-07-01 17:49:08.507 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.507
2025-07-01 17:49:08.507 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.507 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.507
2025-07-01 17:49:08.507 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.507 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.507 alo = 403, ahi = 1101
2025-07-01 17:49:08.507 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.507 blo = 403, bhi = 1101
2025-07-01 17:49:08.507
2025-07-01 17:49:08.507 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.507 g = []
2025-07-01 17:49:08.508 if alo < ahi:
2025-07-01 17:49:08.508 if blo < bhi:
2025-07-01 17:49:08.508 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.508 else:
2025-07-01 17:49:08.508 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.508 elif blo < bhi:
2025-07-01 17:49:08.508 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.508
2025-07-01 17:49:08.508 > yield from g
2025-07-01 17:49:08.508
2025-07-01 17:49:08.508 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.508 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.508
2025-07-01 17:49:08.508 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.508 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.508 alo = 403, ahi = 1101
2025-07-01 17:49:08.508 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.508 blo = 403, bhi = 1101
2025-07-01 17:49:08.508
2025-07-01 17:49:08.509 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.509 r"""
2025-07-01 17:49:08.509 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.509 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.509 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.509 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.509
2025-07-01 17:49:08.509 Example:
2025-07-01 17:49:08.509
2025-07-01 17:49:08.509 >>> d = Differ()
2025-07-01 17:49:08.509 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.509 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.509 >>> print(''.join(results), end="")
2025-07-01 17:49:08.509 - abcDefghiJkl
2025-07-01 17:49:08.509 + abcdefGhijkl
2025-07-01 17:49:08.509 """
2025-07-01 17:49:08.509
2025-07-01 17:49:08.509 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.509 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.510 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.510 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.510 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.510
2025-07-01 17:49:08.510 # search for the pair that matches best without being identical
2025-07-01 17:49:08.510 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.510 # on junk -- unless we have to)
2025-07-01 17:49:08.510 for j in range(blo, bhi):
2025-07-01 17:49:08.510 bj = b[j]
2025-07-01 17:49:08.510 cruncher.set_seq2(bj)
2025-07-01 17:49:08.510 for i in range(alo, ahi):
2025-07-01 17:49:08.510 ai = a[i]
2025-07-01 17:49:08.510 if ai == bj:
2025-07-01 17:49:08.510 if eqi is None:
2025-07-01 17:49:08.510 eqi, eqj = i, j
2025-07-01 17:49:08.510 continue
2025-07-01 17:49:08.510 cruncher.set_seq1(ai)
2025-07-01 17:49:08.510 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.510 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.510 # compares by a factor of 3.
2025-07-01 17:49:08.510 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.511 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.511 # of the computation is cached by cruncher
2025-07-01 17:49:08.511 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.511 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.511 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.511 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.511 if best_ratio < cutoff:
2025-07-01 17:49:08.511 # no non-identical "pretty close" pair
2025-07-01 17:49:08.511 if eqi is None:
2025-07-01 17:49:08.511 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.511 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.511 return
2025-07-01 17:49:08.511 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.511 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.511 else:
2025-07-01 17:49:08.511 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.511 eqi = None
2025-07-01 17:49:08.511
2025-07-01 17:49:08.511 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.511 # identical
2025-07-01 17:49:08.511
2025-07-01 17:49:08.511 # pump out diffs from before the synch point
2025-07-01 17:49:08.512 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.512
2025-07-01 17:49:08.512 # do intraline marking on the synch pair
2025-07-01 17:49:08.512 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.512 if eqi is None:
2025-07-01 17:49:08.512 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.512 atags = btags = ""
2025-07-01 17:49:08.512 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.512 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.512 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.512 if tag == 'replace':
2025-07-01 17:49:08.512 atags += '^' * la
2025-07-01 17:49:08.512 btags += '^' * lb
2025-07-01 17:49:08.512 elif tag == 'delete':
2025-07-01 17:49:08.512 atags += '-' * la
2025-07-01 17:49:08.512 elif tag == 'insert':
2025-07-01 17:49:08.512 btags += '+' * lb
2025-07-01 17:49:08.512 elif tag == 'equal':
2025-07-01 17:49:08.512 atags += ' ' * la
2025-07-01 17:49:08.512 btags += ' ' * lb
2025-07-01 17:49:08.512 else:
2025-07-01 17:49:08.513 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.513 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.513 else:
2025-07-01 17:49:08.513 # the synch pair is identical
2025-07-01 17:49:08.513 yield ' ' + aelt
2025-07-01 17:49:08.513
2025-07-01 17:49:08.513 # pump out diffs from after the synch point
2025-07-01 17:49:08.513 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.513
2025-07-01 17:49:08.513 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.513 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.513
2025-07-01 17:49:08.513 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.513 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.513 alo = 404, ahi = 1101
2025-07-01 17:49:08.513 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.513 blo = 404, bhi = 1101
2025-07-01 17:49:08.513
2025-07-01 17:49:08.513 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.513 g = []
2025-07-01 17:49:08.513 if alo < ahi:
2025-07-01 17:49:08.513 if blo < bhi:
2025-07-01 17:49:08.519 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.519 else:
2025-07-01 17:49:08.519 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.519 elif blo < bhi:
2025-07-01 17:49:08.519 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.519
2025-07-01 17:49:08.519 > yield from g
2025-07-01 17:49:08.519
2025-07-01 17:49:08.519 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.519 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.519
2025-07-01 17:49:08.519 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.519 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.519 alo = 404, ahi = 1101
2025-07-01 17:49:08.519 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.519 blo = 404, bhi = 1101
2025-07-01 17:49:08.519
2025-07-01 17:49:08.519 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.520 r"""
2025-07-01 17:49:08.520 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.520 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.520 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.520 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.520
2025-07-01 17:49:08.520 Example:
2025-07-01 17:49:08.520
2025-07-01 17:49:08.520 >>> d = Differ()
2025-07-01 17:49:08.520 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.520 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.520 >>> print(''.join(results), end="")
2025-07-01 17:49:08.520 - abcDefghiJkl
2025-07-01 17:49:08.520 + abcdefGhijkl
2025-07-01 17:49:08.520 """
2025-07-01 17:49:08.520
2025-07-01 17:49:08.520 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.520 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.521 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.521 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.521 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.521
2025-07-01 17:49:08.521 # search for the pair that matches best without being identical
2025-07-01 17:49:08.521 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.521 # on junk -- unless we have to)
2025-07-01 17:49:08.521 for j in range(blo, bhi):
2025-07-01 17:49:08.521 bj = b[j]
2025-07-01 17:49:08.521 cruncher.set_seq2(bj)
2025-07-01 17:49:08.521 for i in range(alo, ahi):
2025-07-01 17:49:08.521 ai = a[i]
2025-07-01 17:49:08.521 if ai == bj:
2025-07-01 17:49:08.521 if eqi is None:
2025-07-01 17:49:08.521 eqi, eqj = i, j
2025-07-01 17:49:08.521 continue
2025-07-01 17:49:08.521 cruncher.set_seq1(ai)
2025-07-01 17:49:08.521 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.521 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.521 # compares by a factor of 3.
2025-07-01 17:49:08.522 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.522 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.522 # of the computation is cached by cruncher
2025-07-01 17:49:08.522 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.522 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.522 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.522 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.522 if best_ratio < cutoff:
2025-07-01 17:49:08.522 # no non-identical "pretty close" pair
2025-07-01 17:49:08.522 if eqi is None:
2025-07-01 17:49:08.522 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.522 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.522 return
2025-07-01 17:49:08.522 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.522 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.522 else:
2025-07-01 17:49:08.522 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.522 eqi = None
2025-07-01 17:49:08.522
2025-07-01 17:49:08.522 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.522 # identical
2025-07-01 17:49:08.523
2025-07-01 17:49:08.523 # pump out diffs from before the synch point
2025-07-01 17:49:08.523 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.523
2025-07-01 17:49:08.523 # do intraline marking on the synch pair
2025-07-01 17:49:08.523 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.523 if eqi is None:
2025-07-01 17:49:08.523 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.523 atags = btags = ""
2025-07-01 17:49:08.523 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.523 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.523 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.523 if tag == 'replace':
2025-07-01 17:49:08.523 atags += '^' * la
2025-07-01 17:49:08.523 btags += '^' * lb
2025-07-01 17:49:08.523 elif tag == 'delete':
2025-07-01 17:49:08.523 atags += '-' * la
2025-07-01 17:49:08.523 elif tag == 'insert':
2025-07-01 17:49:08.523 btags += '+' * lb
2025-07-01 17:49:08.523 elif tag == 'equal':
2025-07-01 17:49:08.524 atags += ' ' * la
2025-07-01 17:49:08.524 btags += ' ' * lb
2025-07-01 17:49:08.524 else:
2025-07-01 17:49:08.524 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.524 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.524 else:
2025-07-01 17:49:08.524 # the synch pair is identical
2025-07-01 17:49:08.524 yield ' ' + aelt
2025-07-01 17:49:08.524
2025-07-01 17:49:08.524 # pump out diffs from after the synch point
2025-07-01 17:49:08.524 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.524
2025-07-01 17:49:08.524 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.524 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.524
2025-07-01 17:49:08.524 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.524 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.524 alo = 405, ahi = 1101
2025-07-01 17:49:08.524 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.524 blo = 405, bhi = 1101
2025-07-01 17:49:08.524
2025-07-01 17:49:08.525 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.525 g = []
2025-07-01 17:49:08.525 if alo < ahi:
2025-07-01 17:49:08.525 if blo < bhi:
2025-07-01 17:49:08.525 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.525 else:
2025-07-01 17:49:08.525 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.525 elif blo < bhi:
2025-07-01 17:49:08.525 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.525
2025-07-01 17:49:08.525 > yield from g
2025-07-01 17:49:08.525
2025-07-01 17:49:08.525 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.525 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.525
2025-07-01 17:49:08.525 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.525 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.525 alo = 405, ahi = 1101
2025-07-01 17:49:08.525 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.525 blo = 405, bhi = 1101
2025-07-01 17:49:08.526
2025-07-01 17:49:08.526 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.526 r"""
2025-07-01 17:49:08.526 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.526 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.526 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.526 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.526
2025-07-01 17:49:08.526 Example:
2025-07-01 17:49:08.526
2025-07-01 17:49:08.526 >>> d = Differ()
2025-07-01 17:49:08.526 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.526 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.526 >>> print(''.join(results), end="")
2025-07-01 17:49:08.526 - abcDefghiJkl
2025-07-01 17:49:08.526 + abcdefGhijkl
2025-07-01 17:49:08.526 """
2025-07-01 17:49:08.526
2025-07-01 17:49:08.526 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.527 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.527 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.527 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.527 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.527
2025-07-01 17:49:08.527 # search for the pair that matches best without being identical
2025-07-01 17:49:08.527 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.527 # on junk -- unless we have to)
2025-07-01 17:49:08.527 for j in range(blo, bhi):
2025-07-01 17:49:08.527 bj = b[j]
2025-07-01 17:49:08.527 cruncher.set_seq2(bj)
2025-07-01 17:49:08.527 for i in range(alo, ahi):
2025-07-01 17:49:08.527 ai = a[i]
2025-07-01 17:49:08.527 if ai == bj:
2025-07-01 17:49:08.527 if eqi is None:
2025-07-01 17:49:08.527 eqi, eqj = i, j
2025-07-01 17:49:08.527 continue
2025-07-01 17:49:08.527 cruncher.set_seq1(ai)
2025-07-01 17:49:08.527 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.527 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.527 # compares by a factor of 3.
2025-07-01 17:49:08.528 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.528 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.528 # of the computation is cached by cruncher
2025-07-01 17:49:08.528 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.528 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.528 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.528 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.528 if best_ratio < cutoff:
2025-07-01 17:49:08.528 # no non-identical "pretty close" pair
2025-07-01 17:49:08.528 if eqi is None:
2025-07-01 17:49:08.528 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.528 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.528 return
2025-07-01 17:49:08.528 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.528 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.528 else:
2025-07-01 17:49:08.528 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.528 eqi = None
2025-07-01 17:49:08.528
2025-07-01 17:49:08.528 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.529 # identical
2025-07-01 17:49:08.531
2025-07-01 17:49:08.531 # pump out diffs from before the synch point
2025-07-01 17:49:08.532 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.532
2025-07-01 17:49:08.532 # do intraline marking on the synch pair
2025-07-01 17:49:08.532 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.532 if eqi is None:
2025-07-01 17:49:08.532 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.532 atags = btags = ""
2025-07-01 17:49:08.532 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.532 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.532 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.532 if tag == 'replace':
2025-07-01 17:49:08.532 atags += '^' * la
2025-07-01 17:49:08.532 btags += '^' * lb
2025-07-01 17:49:08.532 elif tag == 'delete':
2025-07-01 17:49:08.532 atags += '-' * la
2025-07-01 17:49:08.532 elif tag == 'insert':
2025-07-01 17:49:08.532 btags += '+' * lb
2025-07-01 17:49:08.532 elif tag == 'equal':
2025-07-01 17:49:08.532 atags += ' ' * la
2025-07-01 17:49:08.532 btags += ' ' * lb
2025-07-01 17:49:08.532 else:
2025-07-01 17:49:08.533 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.533 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.533 else:
2025-07-01 17:49:08.533 # the synch pair is identical
2025-07-01 17:49:08.533 yield ' ' + aelt
2025-07-01 17:49:08.533
2025-07-01 17:49:08.533 # pump out diffs from after the synch point
2025-07-01 17:49:08.533 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.533
2025-07-01 17:49:08.533 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.533 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.533
2025-07-01 17:49:08.533 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.533 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.533 alo = 406, ahi = 1101
2025-07-01 17:49:08.533 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.533 blo = 406, bhi = 1101
2025-07-01 17:49:08.533
2025-07-01 17:49:08.533 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.533 g = []
2025-07-01 17:49:08.533 if alo < ahi:
2025-07-01 17:49:08.534 if blo < bhi:
2025-07-01 17:49:08.534 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.534 else:
2025-07-01 17:49:08.534 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.534 elif blo < bhi:
2025-07-01 17:49:08.534 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.534
2025-07-01 17:49:08.534 > yield from g
2025-07-01 17:49:08.534
2025-07-01 17:49:08.534 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.534 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.534
2025-07-01 17:49:08.534 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.534 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.534 alo = 406, ahi = 1101
2025-07-01 17:49:08.534 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.534 blo = 406, bhi = 1101
2025-07-01 17:49:08.534
2025-07-01 17:49:08.534 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.534 r"""
2025-07-01 17:49:08.534 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.535 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.535 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.535 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.535
2025-07-01 17:49:08.535 Example:
2025-07-01 17:49:08.535
2025-07-01 17:49:08.535 >>> d = Differ()
2025-07-01 17:49:08.535 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.535 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.535 >>> print(''.join(results), end="")
2025-07-01 17:49:08.535 - abcDefghiJkl
2025-07-01 17:49:08.535 + abcdefGhijkl
2025-07-01 17:49:08.535 """
2025-07-01 17:49:08.535
2025-07-01 17:49:08.535 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.535 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.535 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.535 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.535 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.536
2025-07-01 17:49:08.536 # search for the pair that matches best without being identical
2025-07-01 17:49:08.536 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.536 # on junk -- unless we have to)
2025-07-01 17:49:08.536 for j in range(blo, bhi):
2025-07-01 17:49:08.536 bj = b[j]
2025-07-01 17:49:08.536 cruncher.set_seq2(bj)
2025-07-01 17:49:08.536 for i in range(alo, ahi):
2025-07-01 17:49:08.536 ai = a[i]
2025-07-01 17:49:08.536 if ai == bj:
2025-07-01 17:49:08.536 if eqi is None:
2025-07-01 17:49:08.536 eqi, eqj = i, j
2025-07-01 17:49:08.536 continue
2025-07-01 17:49:08.536 cruncher.set_seq1(ai)
2025-07-01 17:49:08.536 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.536 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.536 # compares by a factor of 3.
2025-07-01 17:49:08.536 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.536 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.536 # of the computation is cached by cruncher
2025-07-01 17:49:08.536 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.537 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.537 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.537 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.537 if best_ratio < cutoff:
2025-07-01 17:49:08.537 # no non-identical "pretty close" pair
2025-07-01 17:49:08.537 if eqi is None:
2025-07-01 17:49:08.537 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.537 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.537 return
2025-07-01 17:49:08.537 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.537 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.537 else:
2025-07-01 17:49:08.537 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.537 eqi = None
2025-07-01 17:49:08.537
2025-07-01 17:49:08.537 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.537 # identical
2025-07-01 17:49:08.537
2025-07-01 17:49:08.537 # pump out diffs from before the synch point
2025-07-01 17:49:08.537 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.537
2025-07-01 17:49:08.537 # do intraline marking on the synch pair
2025-07-01 17:49:08.538 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.538 if eqi is None:
2025-07-01 17:49:08.538 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.538 atags = btags = ""
2025-07-01 17:49:08.538 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.538 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.538 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.538 if tag == 'replace':
2025-07-01 17:49:08.538 atags += '^' * la
2025-07-01 17:49:08.538 btags += '^' * lb
2025-07-01 17:49:08.538 elif tag == 'delete':
2025-07-01 17:49:08.538 atags += '-' * la
2025-07-01 17:49:08.538 elif tag == 'insert':
2025-07-01 17:49:08.538 btags += '+' * lb
2025-07-01 17:49:08.538 elif tag == 'equal':
2025-07-01 17:49:08.538 atags += ' ' * la
2025-07-01 17:49:08.538 btags += ' ' * lb
2025-07-01 17:49:08.538 else:
2025-07-01 17:49:08.538 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.538 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.538 else:
2025-07-01 17:49:08.539 # the synch pair is identical
2025-07-01 17:49:08.539 yield ' ' + aelt
2025-07-01 17:49:08.539
2025-07-01 17:49:08.539 # pump out diffs from after the synch point
2025-07-01 17:49:08.539 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.539
2025-07-01 17:49:08.539 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.539 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.539
2025-07-01 17:49:08.539 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.539 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.539 alo = 407, ahi = 1101
2025-07-01 17:49:08.539 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.539 blo = 407, bhi = 1101
2025-07-01 17:49:08.539
2025-07-01 17:49:08.539 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.539 g = []
2025-07-01 17:49:08.539 if alo < ahi:
2025-07-01 17:49:08.539 if blo < bhi:
2025-07-01 17:49:08.540 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.540 else:
2025-07-01 17:49:08.540 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.540 elif blo < bhi:
2025-07-01 17:49:08.540 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.540
2025-07-01 17:49:08.540 > yield from g
2025-07-01 17:49:08.540
2025-07-01 17:49:08.540 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.540 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.540
2025-07-01 17:49:08.540 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.540 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.540 alo = 407, ahi = 1101
2025-07-01 17:49:08.540 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.540 blo = 407, bhi = 1101
2025-07-01 17:49:08.540
2025-07-01 17:49:08.540 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.540 r"""
2025-07-01 17:49:08.540 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.541 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.541 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.541 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.541
2025-07-01 17:49:08.541 Example:
2025-07-01 17:49:08.541
2025-07-01 17:49:08.541 >>> d = Differ()
2025-07-01 17:49:08.541 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.541 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.541 >>> print(''.join(results), end="")
2025-07-01 17:49:08.541 - abcDefghiJkl
2025-07-01 17:49:08.541 + abcdefGhijkl
2025-07-01 17:49:08.541 """
2025-07-01 17:49:08.541
2025-07-01 17:49:08.541 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.541 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.541 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.541 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.541 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.542
2025-07-01 17:49:08.542 # search for the pair that matches best without being identical
2025-07-01 17:49:08.542 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.542 # on junk -- unless we have to)
2025-07-01 17:49:08.542 for j in range(blo, bhi):
2025-07-01 17:49:08.542 bj = b[j]
2025-07-01 17:49:08.542 cruncher.set_seq2(bj)
2025-07-01 17:49:08.542 for i in range(alo, ahi):
2025-07-01 17:49:08.542 ai = a[i]
2025-07-01 17:49:08.542 if ai == bj:
2025-07-01 17:49:08.542 if eqi is None:
2025-07-01 17:49:08.542 eqi, eqj = i, j
2025-07-01 17:49:08.542 continue
2025-07-01 17:49:08.542 cruncher.set_seq1(ai)
2025-07-01 17:49:08.542 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.542 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.542 # compares by a factor of 3.
2025-07-01 17:49:08.542 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.542 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.542 # of the computation is cached by cruncher
2025-07-01 17:49:08.543 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.543 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.543 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.543 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.543 if best_ratio < cutoff:
2025-07-01 17:49:08.543 # no non-identical "pretty close" pair
2025-07-01 17:49:08.543 if eqi is None:
2025-07-01 17:49:08.543 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.543 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.543 return
2025-07-01 17:49:08.543 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.543 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.543 else:
2025-07-01 17:49:08.543 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.543 eqi = None
2025-07-01 17:49:08.543
2025-07-01 17:49:08.543 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.543 # identical
2025-07-01 17:49:08.543
2025-07-01 17:49:08.543 # pump out diffs from before the synch point
2025-07-01 17:49:08.543 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.544
2025-07-01 17:49:08.544 # do intraline marking on the synch pair
2025-07-01 17:49:08.544 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.544 if eqi is None:
2025-07-01 17:49:08.544 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.544 atags = btags = ""
2025-07-01 17:49:08.544 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.544 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.544 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.544 if tag == 'replace':
2025-07-01 17:49:08.544 atags += '^' * la
2025-07-01 17:49:08.544 btags += '^' * lb
2025-07-01 17:49:08.544 elif tag == 'delete':
2025-07-01 17:49:08.544 atags += '-' * la
2025-07-01 17:49:08.544 elif tag == 'insert':
2025-07-01 17:49:08.544 btags += '+' * lb
2025-07-01 17:49:08.544 elif tag == 'equal':
2025-07-01 17:49:08.544 atags += ' ' * la
2025-07-01 17:49:08.544 btags += ' ' * lb
2025-07-01 17:49:08.544 else:
2025-07-01 17:49:08.545 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.550 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.550 else:
2025-07-01 17:49:08.550 # the synch pair is identical
2025-07-01 17:49:08.550 yield ' ' + aelt
2025-07-01 17:49:08.550
2025-07-01 17:49:08.550 # pump out diffs from after the synch point
2025-07-01 17:49:08.550 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.550
2025-07-01 17:49:08.550 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.550 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.550
2025-07-01 17:49:08.550 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.550 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.550 alo = 408, ahi = 1101
2025-07-01 17:49:08.550 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.550 blo = 408, bhi = 1101
2025-07-01 17:49:08.550
2025-07-01 17:49:08.550 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.550 g = []
2025-07-01 17:49:08.551 if alo < ahi:
2025-07-01 17:49:08.551 if blo < bhi:
2025-07-01 17:49:08.551 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.551 else:
2025-07-01 17:49:08.551 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.551 elif blo < bhi:
2025-07-01 17:49:08.551 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.551
2025-07-01 17:49:08.551 > yield from g
2025-07-01 17:49:08.551
2025-07-01 17:49:08.551 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.551 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.551
2025-07-01 17:49:08.551 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.551 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.551 alo = 408, ahi = 1101
2025-07-01 17:49:08.551 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.551 blo = 408, bhi = 1101
2025-07-01 17:49:08.551
2025-07-01 17:49:08.551 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.551 r"""
2025-07-01 17:49:08.552 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.552 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.552 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.552 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.552
2025-07-01 17:49:08.552 Example:
2025-07-01 17:49:08.552
2025-07-01 17:49:08.552 >>> d = Differ()
2025-07-01 17:49:08.552 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.552 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.552 >>> print(''.join(results), end="")
2025-07-01 17:49:08.552 - abcDefghiJkl
2025-07-01 17:49:08.552 + abcdefGhijkl
2025-07-01 17:49:08.552 """
2025-07-01 17:49:08.552
2025-07-01 17:49:08.552 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.552 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.553 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.553 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.553 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.553
2025-07-01 17:49:08.553 # search for the pair that matches best without being identical
2025-07-01 17:49:08.553 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.553 # on junk -- unless we have to)
2025-07-01 17:49:08.553 for j in range(blo, bhi):
2025-07-01 17:49:08.553 bj = b[j]
2025-07-01 17:49:08.553 cruncher.set_seq2(bj)
2025-07-01 17:49:08.553 for i in range(alo, ahi):
2025-07-01 17:49:08.553 ai = a[i]
2025-07-01 17:49:08.553 if ai == bj:
2025-07-01 17:49:08.553 if eqi is None:
2025-07-01 17:49:08.553 eqi, eqj = i, j
2025-07-01 17:49:08.553 continue
2025-07-01 17:49:08.553 cruncher.set_seq1(ai)
2025-07-01 17:49:08.553 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.553 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.553 # compares by a factor of 3.
2025-07-01 17:49:08.554 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.554 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.554 # of the computation is cached by cruncher
2025-07-01 17:49:08.554 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.554 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.554 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.554 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.554 if best_ratio < cutoff:
2025-07-01 17:49:08.554 # no non-identical "pretty close" pair
2025-07-01 17:49:08.554 if eqi is None:
2025-07-01 17:49:08.554 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.554 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.554 return
2025-07-01 17:49:08.554 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.554 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.554 else:
2025-07-01 17:49:08.554 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.554 eqi = None
2025-07-01 17:49:08.554
2025-07-01 17:49:08.554 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.555 # identical
2025-07-01 17:49:08.555
2025-07-01 17:49:08.555 # pump out diffs from before the synch point
2025-07-01 17:49:08.555 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.555
2025-07-01 17:49:08.555 # do intraline marking on the synch pair
2025-07-01 17:49:08.555 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.555 if eqi is None:
2025-07-01 17:49:08.555 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.555 atags = btags = ""
2025-07-01 17:49:08.555 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.555 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.555 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.555 if tag == 'replace':
2025-07-01 17:49:08.555 atags += '^' * la
2025-07-01 17:49:08.555 btags += '^' * lb
2025-07-01 17:49:08.555 elif tag == 'delete':
2025-07-01 17:49:08.555 atags += '-' * la
2025-07-01 17:49:08.555 elif tag == 'insert':
2025-07-01 17:49:08.555 btags += '+' * lb
2025-07-01 17:49:08.555 elif tag == 'equal':
2025-07-01 17:49:08.556 atags += ' ' * la
2025-07-01 17:49:08.556 btags += ' ' * lb
2025-07-01 17:49:08.556 else:
2025-07-01 17:49:08.556 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.556 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.556 else:
2025-07-01 17:49:08.556 # the synch pair is identical
2025-07-01 17:49:08.556 yield ' ' + aelt
2025-07-01 17:49:08.556
2025-07-01 17:49:08.556 # pump out diffs from after the synch point
2025-07-01 17:49:08.556 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.556
2025-07-01 17:49:08.556 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.556 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.556
2025-07-01 17:49:08.556 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.556 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.556 alo = 409, ahi = 1101
2025-07-01 17:49:08.556 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.556 blo = 409, bhi = 1101
2025-07-01 17:49:08.557
2025-07-01 17:49:08.557 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.557 g = []
2025-07-01 17:49:08.557 if alo < ahi:
2025-07-01 17:49:08.557 if blo < bhi:
2025-07-01 17:49:08.557 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.557 else:
2025-07-01 17:49:08.557 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.557 elif blo < bhi:
2025-07-01 17:49:08.557 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.557
2025-07-01 17:49:08.557 > yield from g
2025-07-01 17:49:08.557
2025-07-01 17:49:08.557 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.557 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.557
2025-07-01 17:49:08.557 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.557 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.557 alo = 409, ahi = 1101
2025-07-01 17:49:08.557 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.557 blo = 409, bhi = 1101
2025-07-01 17:49:08.558
2025-07-01 17:49:08.558 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.558 r"""
2025-07-01 17:49:08.558 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.558 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.558 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.558 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.558
2025-07-01 17:49:08.558 Example:
2025-07-01 17:49:08.558
2025-07-01 17:49:08.558 >>> d = Differ()
2025-07-01 17:49:08.558 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.558 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.558 >>> print(''.join(results), end="")
2025-07-01 17:49:08.558 - abcDefghiJkl
2025-07-01 17:49:08.558 + abcdefGhijkl
2025-07-01 17:49:08.558 """
2025-07-01 17:49:08.558
2025-07-01 17:49:08.559 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.559 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.559 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.559 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.559 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.559
2025-07-01 17:49:08.559 # search for the pair that matches best without being identical
2025-07-01 17:49:08.559 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.559 # on junk -- unless we have to)
2025-07-01 17:49:08.559 for j in range(blo, bhi):
2025-07-01 17:49:08.559 bj = b[j]
2025-07-01 17:49:08.559 cruncher.set_seq2(bj)
2025-07-01 17:49:08.559 for i in range(alo, ahi):
2025-07-01 17:49:08.559 ai = a[i]
2025-07-01 17:49:08.559 if ai == bj:
2025-07-01 17:49:08.559 if eqi is None:
2025-07-01 17:49:08.559 eqi, eqj = i, j
2025-07-01 17:49:08.559 continue
2025-07-01 17:49:08.559 cruncher.set_seq1(ai)
2025-07-01 17:49:08.559 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.559 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.560 # compares by a factor of 3.
2025-07-01 17:49:08.563 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.563 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.563 # of the computation is cached by cruncher
2025-07-01 17:49:08.563 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.563 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.563 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.563 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.563 if best_ratio < cutoff:
2025-07-01 17:49:08.563 # no non-identical "pretty close" pair
2025-07-01 17:49:08.563 if eqi is None:
2025-07-01 17:49:08.563 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.563 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.563 return
2025-07-01 17:49:08.563 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.563 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.563 else:
2025-07-01 17:49:08.563 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.563 eqi = None
2025-07-01 17:49:08.564
2025-07-01 17:49:08.564 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.564 # identical
2025-07-01 17:49:08.564
2025-07-01 17:49:08.564 # pump out diffs from before the synch point
2025-07-01 17:49:08.564 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.564
2025-07-01 17:49:08.564 # do intraline marking on the synch pair
2025-07-01 17:49:08.564 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.564 if eqi is None:
2025-07-01 17:49:08.564 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.564 atags = btags = ""
2025-07-01 17:49:08.564 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.564 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.564 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.564 if tag == 'replace':
2025-07-01 17:49:08.564 atags += '^' * la
2025-07-01 17:49:08.564 btags += '^' * lb
2025-07-01 17:49:08.564 elif tag == 'delete':
2025-07-01 17:49:08.564 atags += '-' * la
2025-07-01 17:49:08.564 elif tag == 'insert':
2025-07-01 17:49:08.565 btags += '+' * lb
2025-07-01 17:49:08.565 elif tag == 'equal':
2025-07-01 17:49:08.565 atags += ' ' * la
2025-07-01 17:49:08.565 btags += ' ' * lb
2025-07-01 17:49:08.565 else:
2025-07-01 17:49:08.565 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.565 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.565 else:
2025-07-01 17:49:08.565 # the synch pair is identical
2025-07-01 17:49:08.565 yield ' ' + aelt
2025-07-01 17:49:08.565
2025-07-01 17:49:08.565 # pump out diffs from after the synch point
2025-07-01 17:49:08.565 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.565
2025-07-01 17:49:08.565 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.565 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.565
2025-07-01 17:49:08.565 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.565 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.565 alo = 410, ahi = 1101
2025-07-01 17:49:08.566 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.566 blo = 410, bhi = 1101
2025-07-01 17:49:08.566
2025-07-01 17:49:08.566 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.566 g = []
2025-07-01 17:49:08.566 if alo < ahi:
2025-07-01 17:49:08.566 if blo < bhi:
2025-07-01 17:49:08.566 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.566 else:
2025-07-01 17:49:08.566 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.566 elif blo < bhi:
2025-07-01 17:49:08.566 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.566
2025-07-01 17:49:08.566 > yield from g
2025-07-01 17:49:08.566
2025-07-01 17:49:08.566 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.566 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.566
2025-07-01 17:49:08.566 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.566 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.567 alo = 410, ahi = 1101
2025-07-01 17:49:08.567 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.567 blo = 410, bhi = 1101
2025-07-01 17:49:08.567
2025-07-01 17:49:08.567 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.567 r"""
2025-07-01 17:49:08.567 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.567 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.567 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.567 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.567
2025-07-01 17:49:08.567 Example:
2025-07-01 17:49:08.567
2025-07-01 17:49:08.567 >>> d = Differ()
2025-07-01 17:49:08.567 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.567 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.567 >>> print(''.join(results), end="")
2025-07-01 17:49:08.567 - abcDefghiJkl
2025-07-01 17:49:08.567 + abcdefGhijkl
2025-07-01 17:49:08.568 """
2025-07-01 17:49:08.568
2025-07-01 17:49:08.568 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.568 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.568 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.568 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.568 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.568
2025-07-01 17:49:08.568 # search for the pair that matches best without being identical
2025-07-01 17:49:08.568 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.568 # on junk -- unless we have to)
2025-07-01 17:49:08.568 for j in range(blo, bhi):
2025-07-01 17:49:08.568 bj = b[j]
2025-07-01 17:49:08.568 cruncher.set_seq2(bj)
2025-07-01 17:49:08.568 for i in range(alo, ahi):
2025-07-01 17:49:08.568 ai = a[i]
2025-07-01 17:49:08.568 if ai == bj:
2025-07-01 17:49:08.568 if eqi is None:
2025-07-01 17:49:08.568 eqi, eqj = i, j
2025-07-01 17:49:08.568 continue
2025-07-01 17:49:08.568 cruncher.set_seq1(ai)
2025-07-01 17:49:08.569 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.569 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.569 # compares by a factor of 3.
2025-07-01 17:49:08.569 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.569 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.569 # of the computation is cached by cruncher
2025-07-01 17:49:08.569 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.569 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.569 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.569 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.569 if best_ratio < cutoff:
2025-07-01 17:49:08.569 # no non-identical "pretty close" pair
2025-07-01 17:49:08.569 if eqi is None:
2025-07-01 17:49:08.569 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.569 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.569 return
2025-07-01 17:49:08.569 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.569 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.569 else:
2025-07-01 17:49:08.569 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.569 eqi = None
2025-07-01 17:49:08.569
2025-07-01 17:49:08.570 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.570 # identical
2025-07-01 17:49:08.570
2025-07-01 17:49:08.570 # pump out diffs from before the synch point
2025-07-01 17:49:08.570 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.570
2025-07-01 17:49:08.570 # do intraline marking on the synch pair
2025-07-01 17:49:08.570 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.570 if eqi is None:
2025-07-01 17:49:08.570 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.570 atags = btags = ""
2025-07-01 17:49:08.570 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.570 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.570 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.570 if tag == 'replace':
2025-07-01 17:49:08.570 atags += '^' * la
2025-07-01 17:49:08.570 btags += '^' * lb
2025-07-01 17:49:08.570 elif tag == 'delete':
2025-07-01 17:49:08.570 atags += '-' * la
2025-07-01 17:49:08.570 elif tag == 'insert':
2025-07-01 17:49:08.570 btags += '+' * lb
2025-07-01 17:49:08.571 elif tag == 'equal':
2025-07-01 17:49:08.571 atags += ' ' * la
2025-07-01 17:49:08.571 btags += ' ' * lb
2025-07-01 17:49:08.571 else:
2025-07-01 17:49:08.571 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.571 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.571 else:
2025-07-01 17:49:08.571 # the synch pair is identical
2025-07-01 17:49:08.571 yield ' ' + aelt
2025-07-01 17:49:08.571
2025-07-01 17:49:08.571 # pump out diffs from after the synch point
2025-07-01 17:49:08.571 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.571
2025-07-01 17:49:08.571 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.571 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.571
2025-07-01 17:49:08.571 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.571 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.571 alo = 411, ahi = 1101
2025-07-01 17:49:08.571 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.571 blo = 411, bhi = 1101
2025-07-01 17:49:08.572
2025-07-01 17:49:08.572 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.572 g = []
2025-07-01 17:49:08.572 if alo < ahi:
2025-07-01 17:49:08.572 if blo < bhi:
2025-07-01 17:49:08.572 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.572 else:
2025-07-01 17:49:08.572 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.572 elif blo < bhi:
2025-07-01 17:49:08.572 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.572
2025-07-01 17:49:08.572 > yield from g
2025-07-01 17:49:08.572
2025-07-01 17:49:08.572 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.572 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.572
2025-07-01 17:49:08.572 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.572 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.572 alo = 411, ahi = 1101
2025-07-01 17:49:08.573 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.573 blo = 411, bhi = 1101
2025-07-01 17:49:08.573
2025-07-01 17:49:08.573 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.573 r"""
2025-07-01 17:49:08.573 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.573 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.573 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.573 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.573
2025-07-01 17:49:08.573 Example:
2025-07-01 17:49:08.573
2025-07-01 17:49:08.573 >>> d = Differ()
2025-07-01 17:49:08.573 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.573 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.573 >>> print(''.join(results), end="")
2025-07-01 17:49:08.573 - abcDefghiJkl
2025-07-01 17:49:08.573 + abcdefGhijkl
2025-07-01 17:49:08.573 """
2025-07-01 17:49:08.574
2025-07-01 17:49:08.574 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.574 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.574 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.574 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.574 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.574
2025-07-01 17:49:08.574 # search for the pair that matches best without being identical
2025-07-01 17:49:08.574 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.574 # on junk -- unless we have to)
2025-07-01 17:49:08.574 for j in range(blo, bhi):
2025-07-01 17:49:08.574 bj = b[j]
2025-07-01 17:49:08.574 cruncher.set_seq2(bj)
2025-07-01 17:49:08.574 for i in range(alo, ahi):
2025-07-01 17:49:08.574 ai = a[i]
2025-07-01 17:49:08.574 if ai == bj:
2025-07-01 17:49:08.574 if eqi is None:
2025-07-01 17:49:08.574 eqi, eqj = i, j
2025-07-01 17:49:08.574 continue
2025-07-01 17:49:08.574 cruncher.set_seq1(ai)
2025-07-01 17:49:08.574 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.575 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.575 # compares by a factor of 3.
2025-07-01 17:49:08.575 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.575 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.575 # of the computation is cached by cruncher
2025-07-01 17:49:08.575 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.575 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.575 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.575 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.575 if best_ratio < cutoff:
2025-07-01 17:49:08.575 # no non-identical "pretty close" pair
2025-07-01 17:49:08.575 if eqi is None:
2025-07-01 17:49:08.575 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.575 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.575 return
2025-07-01 17:49:08.575 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.575 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.575 else:
2025-07-01 17:49:08.575 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.575 eqi = None
2025-07-01 17:49:08.575
2025-07-01 17:49:08.576 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.581 # identical
2025-07-01 17:49:08.581
2025-07-01 17:49:08.581 # pump out diffs from before the synch point
2025-07-01 17:49:08.581 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.581
2025-07-01 17:49:08.581 # do intraline marking on the synch pair
2025-07-01 17:49:08.581 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.581 if eqi is None:
2025-07-01 17:49:08.581 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.581 atags = btags = ""
2025-07-01 17:49:08.581 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.581 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.581 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.581 if tag == 'replace':
2025-07-01 17:49:08.581 atags += '^' * la
2025-07-01 17:49:08.581 btags += '^' * lb
2025-07-01 17:49:08.581 elif tag == 'delete':
2025-07-01 17:49:08.581 atags += '-' * la
2025-07-01 17:49:08.581 elif tag == 'insert':
2025-07-01 17:49:08.582 btags += '+' * lb
2025-07-01 17:49:08.582 elif tag == 'equal':
2025-07-01 17:49:08.582 atags += ' ' * la
2025-07-01 17:49:08.582 btags += ' ' * lb
2025-07-01 17:49:08.582 else:
2025-07-01 17:49:08.582 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.582 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.582 else:
2025-07-01 17:49:08.582 # the synch pair is identical
2025-07-01 17:49:08.582 yield ' ' + aelt
2025-07-01 17:49:08.582
2025-07-01 17:49:08.582 # pump out diffs from after the synch point
2025-07-01 17:49:08.582 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.582
2025-07-01 17:49:08.582 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.582 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.582
2025-07-01 17:49:08.582 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.582 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.582 alo = 412, ahi = 1101
2025-07-01 17:49:08.582 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.583 blo = 412, bhi = 1101
2025-07-01 17:49:08.583
2025-07-01 17:49:08.583 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.583 g = []
2025-07-01 17:49:08.583 if alo < ahi:
2025-07-01 17:49:08.583 if blo < bhi:
2025-07-01 17:49:08.583 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.583 else:
2025-07-01 17:49:08.583 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.583 elif blo < bhi:
2025-07-01 17:49:08.583 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.583
2025-07-01 17:49:08.583 > yield from g
2025-07-01 17:49:08.583
2025-07-01 17:49:08.583 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.583 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.583
2025-07-01 17:49:08.583 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.583 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.583 alo = 412, ahi = 1101
2025-07-01 17:49:08.583 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.584 blo = 412, bhi = 1101
2025-07-01 17:49:08.584
2025-07-01 17:49:08.584 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.584 r"""
2025-07-01 17:49:08.584 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.584 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.584 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.584 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.584
2025-07-01 17:49:08.584 Example:
2025-07-01 17:49:08.584
2025-07-01 17:49:08.584 >>> d = Differ()
2025-07-01 17:49:08.584 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.584 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.584 >>> print(''.join(results), end="")
2025-07-01 17:49:08.584 - abcDefghiJkl
2025-07-01 17:49:08.584 + abcdefGhijkl
2025-07-01 17:49:08.584 """
2025-07-01 17:49:08.584
2025-07-01 17:49:08.584 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.585 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.585 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.585 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.585 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.585
2025-07-01 17:49:08.585 # search for the pair that matches best without being identical
2025-07-01 17:49:08.585 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.585 # on junk -- unless we have to)
2025-07-01 17:49:08.585 for j in range(blo, bhi):
2025-07-01 17:49:08.585 bj = b[j]
2025-07-01 17:49:08.585 cruncher.set_seq2(bj)
2025-07-01 17:49:08.585 for i in range(alo, ahi):
2025-07-01 17:49:08.585 ai = a[i]
2025-07-01 17:49:08.585 if ai == bj:
2025-07-01 17:49:08.585 if eqi is None:
2025-07-01 17:49:08.585 eqi, eqj = i, j
2025-07-01 17:49:08.585 continue
2025-07-01 17:49:08.585 cruncher.set_seq1(ai)
2025-07-01 17:49:08.586 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.586 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.586 # compares by a factor of 3.
2025-07-01 17:49:08.586 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.586 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.586 # of the computation is cached by cruncher
2025-07-01 17:49:08.586 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.586 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.587 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.587 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.587 if best_ratio < cutoff:
2025-07-01 17:49:08.587 # no non-identical "pretty close" pair
2025-07-01 17:49:08.587 if eqi is None:
2025-07-01 17:49:08.587 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.587 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.587 return
2025-07-01 17:49:08.587 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.587 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.587 else:
2025-07-01 17:49:08.587 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.587 eqi = None
2025-07-01 17:49:08.587
2025-07-01 17:49:08.587 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.587 # identical
2025-07-01 17:49:08.587
2025-07-01 17:49:08.587 # pump out diffs from before the synch point
2025-07-01 17:49:08.587 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.587
2025-07-01 17:49:08.588 # do intraline marking on the synch pair
2025-07-01 17:49:08.588 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.588 if eqi is None:
2025-07-01 17:49:08.588 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.588 atags = btags = ""
2025-07-01 17:49:08.588 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.588 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.588 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.588 if tag == 'replace':
2025-07-01 17:49:08.588 atags += '^' * la
2025-07-01 17:49:08.588 btags += '^' * lb
2025-07-01 17:49:08.588 elif tag == 'delete':
2025-07-01 17:49:08.588 atags += '-' * la
2025-07-01 17:49:08.588 elif tag == 'insert':
2025-07-01 17:49:08.588 btags += '+' * lb
2025-07-01 17:49:08.588 elif tag == 'equal':
2025-07-01 17:49:08.588 atags += ' ' * la
2025-07-01 17:49:08.588 btags += ' ' * lb
2025-07-01 17:49:08.588 else:
2025-07-01 17:49:08.588 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.588 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.589 else:
2025-07-01 17:49:08.589 # the synch pair is identical
2025-07-01 17:49:08.589 yield ' ' + aelt
2025-07-01 17:49:08.589
2025-07-01 17:49:08.589 # pump out diffs from after the synch point
2025-07-01 17:49:08.589 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.589
2025-07-01 17:49:08.589 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.589 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.589
2025-07-01 17:49:08.589 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.589 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.589 alo = 413, ahi = 1101
2025-07-01 17:49:08.589 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.589 blo = 413, bhi = 1101
2025-07-01 17:49:08.589
2025-07-01 17:49:08.589 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.589 g = []
2025-07-01 17:49:08.589 if alo < ahi:
2025-07-01 17:49:08.589 if blo < bhi:
2025-07-01 17:49:08.589 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.590 else:
2025-07-01 17:49:08.590 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.590 elif blo < bhi:
2025-07-01 17:49:08.590 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.590
2025-07-01 17:49:08.590 > yield from g
2025-07-01 17:49:08.590
2025-07-01 17:49:08.590 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.590 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.590
2025-07-01 17:49:08.590 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.590 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.590 alo = 413, ahi = 1101
2025-07-01 17:49:08.590 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.590 blo = 413, bhi = 1101
2025-07-01 17:49:08.590
2025-07-01 17:49:08.590 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.590 r"""
2025-07-01 17:49:08.590 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.590 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.590 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.591 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.591
2025-07-01 17:49:08.591 Example:
2025-07-01 17:49:08.591
2025-07-01 17:49:08.591 >>> d = Differ()
2025-07-01 17:49:08.591 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.591 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.591 >>> print(''.join(results), end="")
2025-07-01 17:49:08.591 - abcDefghiJkl
2025-07-01 17:49:08.591 + abcdefGhijkl
2025-07-01 17:49:08.591 """
2025-07-01 17:49:08.591
2025-07-01 17:49:08.591 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.591 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.591 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.591 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.591 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.591
2025-07-01 17:49:08.591 # search for the pair that matches best without being identical
2025-07-01 17:49:08.595 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.595 # on junk -- unless we have to)
2025-07-01 17:49:08.595 for j in range(blo, bhi):
2025-07-01 17:49:08.595 bj = b[j]
2025-07-01 17:49:08.595 cruncher.set_seq2(bj)
2025-07-01 17:49:08.595 for i in range(alo, ahi):
2025-07-01 17:49:08.595 ai = a[i]
2025-07-01 17:49:08.595 if ai == bj:
2025-07-01 17:49:08.595 if eqi is None:
2025-07-01 17:49:08.595 eqi, eqj = i, j
2025-07-01 17:49:08.595 continue
2025-07-01 17:49:08.595 cruncher.set_seq1(ai)
2025-07-01 17:49:08.595 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.595 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.595 # compares by a factor of 3.
2025-07-01 17:49:08.595 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.595 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.595 # of the computation is cached by cruncher
2025-07-01 17:49:08.595 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.595 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.596 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.596 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.596 if best_ratio < cutoff:
2025-07-01 17:49:08.596 # no non-identical "pretty close" pair
2025-07-01 17:49:08.596 if eqi is None:
2025-07-01 17:49:08.596 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.596 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.596 return
2025-07-01 17:49:08.596 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.596 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.596 else:
2025-07-01 17:49:08.596 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.596 eqi = None
2025-07-01 17:49:08.596
2025-07-01 17:49:08.596 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.596 # identical
2025-07-01 17:49:08.596
2025-07-01 17:49:08.596 # pump out diffs from before the synch point
2025-07-01 17:49:08.596 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.596
2025-07-01 17:49:08.597 # do intraline marking on the synch pair
2025-07-01 17:49:08.597 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.597 if eqi is None:
2025-07-01 17:49:08.597 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.597 atags = btags = ""
2025-07-01 17:49:08.597 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.597 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.597 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.597 if tag == 'replace':
2025-07-01 17:49:08.597 atags += '^' * la
2025-07-01 17:49:08.597 btags += '^' * lb
2025-07-01 17:49:08.597 elif tag == 'delete':
2025-07-01 17:49:08.597 atags += '-' * la
2025-07-01 17:49:08.597 elif tag == 'insert':
2025-07-01 17:49:08.597 btags += '+' * lb
2025-07-01 17:49:08.597 elif tag == 'equal':
2025-07-01 17:49:08.597 atags += ' ' * la
2025-07-01 17:49:08.597 btags += ' ' * lb
2025-07-01 17:49:08.597 else:
2025-07-01 17:49:08.597 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.598 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.598 else:
2025-07-01 17:49:08.598 # the synch pair is identical
2025-07-01 17:49:08.598 yield ' ' + aelt
2025-07-01 17:49:08.598
2025-07-01 17:49:08.598 # pump out diffs from after the synch point
2025-07-01 17:49:08.598 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.598
2025-07-01 17:49:08.598 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.598 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.598
2025-07-01 17:49:08.598 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.598 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.598 alo = 414, ahi = 1101
2025-07-01 17:49:08.598 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.598 blo = 414, bhi = 1101
2025-07-01 17:49:08.598
2025-07-01 17:49:08.598 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.599 g = []
2025-07-01 17:49:08.599 if alo < ahi:
2025-07-01 17:49:08.599 if blo < bhi:
2025-07-01 17:49:08.599 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.599 else:
2025-07-01 17:49:08.599 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.599 elif blo < bhi:
2025-07-01 17:49:08.599 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.599
2025-07-01 17:49:08.599 > yield from g
2025-07-01 17:49:08.599
2025-07-01 17:49:08.599 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.599 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.599
2025-07-01 17:49:08.599 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.599 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.599 alo = 414, ahi = 1101
2025-07-01 17:49:08.599 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.599 blo = 414, bhi = 1101
2025-07-01 17:49:08.599
2025-07-01 17:49:08.599 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.600 r"""
2025-07-01 17:49:08.600 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.600 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.600 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.600 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.600
2025-07-01 17:49:08.600 Example:
2025-07-01 17:49:08.600
2025-07-01 17:49:08.600 >>> d = Differ()
2025-07-01 17:49:08.600 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.600 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.600 >>> print(''.join(results), end="")
2025-07-01 17:49:08.600 - abcDefghiJkl
2025-07-01 17:49:08.600 + abcdefGhijkl
2025-07-01 17:49:08.600 """
2025-07-01 17:49:08.600
2025-07-01 17:49:08.600 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.600 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.600 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.601 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.601 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.601
2025-07-01 17:49:08.601 # search for the pair that matches best without being identical
2025-07-01 17:49:08.601 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.601 # on junk -- unless we have to)
2025-07-01 17:49:08.601 for j in range(blo, bhi):
2025-07-01 17:49:08.601 bj = b[j]
2025-07-01 17:49:08.601 cruncher.set_seq2(bj)
2025-07-01 17:49:08.601 for i in range(alo, ahi):
2025-07-01 17:49:08.601 ai = a[i]
2025-07-01 17:49:08.601 if ai == bj:
2025-07-01 17:49:08.601 if eqi is None:
2025-07-01 17:49:08.601 eqi, eqj = i, j
2025-07-01 17:49:08.601 continue
2025-07-01 17:49:08.601 cruncher.set_seq1(ai)
2025-07-01 17:49:08.601 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.601 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.601 # compares by a factor of 3.
2025-07-01 17:49:08.601 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.601 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.601 # of the computation is cached by cruncher
2025-07-01 17:49:08.602 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.602 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.602 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.602 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.602 if best_ratio < cutoff:
2025-07-01 17:49:08.602 # no non-identical "pretty close" pair
2025-07-01 17:49:08.602 if eqi is None:
2025-07-01 17:49:08.602 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.602 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.602 return
2025-07-01 17:49:08.602 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.602 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.602 else:
2025-07-01 17:49:08.602 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.602 eqi = None
2025-07-01 17:49:08.602
2025-07-01 17:49:08.602 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.602 # identical
2025-07-01 17:49:08.602
2025-07-01 17:49:08.602 # pump out diffs from before the synch point
2025-07-01 17:49:08.602 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.602
2025-07-01 17:49:08.603 # do intraline marking on the synch pair
2025-07-01 17:49:08.603 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.603 if eqi is None:
2025-07-01 17:49:08.603 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.603 atags = btags = ""
2025-07-01 17:49:08.603 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.603 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.603 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.603 if tag == 'replace':
2025-07-01 17:49:08.603 atags += '^' * la
2025-07-01 17:49:08.603 btags += '^' * lb
2025-07-01 17:49:08.603 elif tag == 'delete':
2025-07-01 17:49:08.603 atags += '-' * la
2025-07-01 17:49:08.603 elif tag == 'insert':
2025-07-01 17:49:08.603 btags += '+' * lb
2025-07-01 17:49:08.603 elif tag == 'equal':
2025-07-01 17:49:08.603 atags += ' ' * la
2025-07-01 17:49:08.603 btags += ' ' * lb
2025-07-01 17:49:08.603 else:
2025-07-01 17:49:08.603 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.603 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.604 else:
2025-07-01 17:49:08.604 # the synch pair is identical
2025-07-01 17:49:08.604 yield ' ' + aelt
2025-07-01 17:49:08.604
2025-07-01 17:49:08.604 # pump out diffs from after the synch point
2025-07-01 17:49:08.604 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.604
2025-07-01 17:49:08.604 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.604 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.604
2025-07-01 17:49:08.604 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.604 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.604 alo = 415, ahi = 1101
2025-07-01 17:49:08.604 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.604 blo = 415, bhi = 1101
2025-07-01 17:49:08.604
2025-07-01 17:49:08.604 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.604 g = []
2025-07-01 17:49:08.604 if alo < ahi:
2025-07-01 17:49:08.604 if blo < bhi:
2025-07-01 17:49:08.604 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.604 else:
2025-07-01 17:49:08.605 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.605 elif blo < bhi:
2025-07-01 17:49:08.605 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.605
2025-07-01 17:49:08.605 > yield from g
2025-07-01 17:49:08.605
2025-07-01 17:49:08.605 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.605 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.605
2025-07-01 17:49:08.605 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.605 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.605 alo = 415, ahi = 1101
2025-07-01 17:49:08.605 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.605 blo = 415, bhi = 1101
2025-07-01 17:49:08.605
2025-07-01 17:49:08.605 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.605 r"""
2025-07-01 17:49:08.605 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.605 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.605 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.606 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.606
2025-07-01 17:49:08.606 Example:
2025-07-01 17:49:08.606
2025-07-01 17:49:08.606 >>> d = Differ()
2025-07-01 17:49:08.606 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.606 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.606 >>> print(''.join(results), end="")
2025-07-01 17:49:08.606 - abcDefghiJkl
2025-07-01 17:49:08.606 + abcdefGhijkl
2025-07-01 17:49:08.606 """
2025-07-01 17:49:08.606
2025-07-01 17:49:08.606 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.606 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.606 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.606 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.606 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.606
2025-07-01 17:49:08.606 # search for the pair that matches best without being identical
2025-07-01 17:49:08.607 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.607 # on junk -- unless we have to)
2025-07-01 17:49:08.607 for j in range(blo, bhi):
2025-07-01 17:49:08.607 bj = b[j]
2025-07-01 17:49:08.607 cruncher.set_seq2(bj)
2025-07-01 17:49:08.607 for i in range(alo, ahi):
2025-07-01 17:49:08.607 ai = a[i]
2025-07-01 17:49:08.607 if ai == bj:
2025-07-01 17:49:08.607 if eqi is None:
2025-07-01 17:49:08.607 eqi, eqj = i, j
2025-07-01 17:49:08.607 continue
2025-07-01 17:49:08.607 cruncher.set_seq1(ai)
2025-07-01 17:49:08.607 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.607 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.607 # compares by a factor of 3.
2025-07-01 17:49:08.607 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.607 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.607 # of the computation is cached by cruncher
2025-07-01 17:49:08.607 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.607 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.607 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.608 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.613 if best_ratio < cutoff:
2025-07-01 17:49:08.613 # no non-identical "pretty close" pair
2025-07-01 17:49:08.613 if eqi is None:
2025-07-01 17:49:08.613 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.613 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.613 return
2025-07-01 17:49:08.613 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.613 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.613 else:
2025-07-01 17:49:08.613 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.613 eqi = None
2025-07-01 17:49:08.613
2025-07-01 17:49:08.613 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.613 # identical
2025-07-01 17:49:08.613
2025-07-01 17:49:08.613 # pump out diffs from before the synch point
2025-07-01 17:49:08.613 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.613
2025-07-01 17:49:08.613 # do intraline marking on the synch pair
2025-07-01 17:49:08.614 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.614 if eqi is None:
2025-07-01 17:49:08.614 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.614 atags = btags = ""
2025-07-01 17:49:08.614 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.614 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.614 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.614 if tag == 'replace':
2025-07-01 17:49:08.614 atags += '^' * la
2025-07-01 17:49:08.614 btags += '^' * lb
2025-07-01 17:49:08.614 elif tag == 'delete':
2025-07-01 17:49:08.614 atags += '-' * la
2025-07-01 17:49:08.614 elif tag == 'insert':
2025-07-01 17:49:08.614 btags += '+' * lb
2025-07-01 17:49:08.614 elif tag == 'equal':
2025-07-01 17:49:08.614 atags += ' ' * la
2025-07-01 17:49:08.614 btags += ' ' * lb
2025-07-01 17:49:08.614 else:
2025-07-01 17:49:08.614 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.614 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.614 else:
2025-07-01 17:49:08.615 # the synch pair is identical
2025-07-01 17:49:08.615 yield ' ' + aelt
2025-07-01 17:49:08.615
2025-07-01 17:49:08.615 # pump out diffs from after the synch point
2025-07-01 17:49:08.615 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.615
2025-07-01 17:49:08.615 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.615 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.615
2025-07-01 17:49:08.615 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.615 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.615 alo = 416, ahi = 1101
2025-07-01 17:49:08.615 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.615 blo = 416, bhi = 1101
2025-07-01 17:49:08.615
2025-07-01 17:49:08.615 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.615 g = []
2025-07-01 17:49:08.615 if alo < ahi:
2025-07-01 17:49:08.615 if blo < bhi:
2025-07-01 17:49:08.615 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.615 else:
2025-07-01 17:49:08.615 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.616 elif blo < bhi:
2025-07-01 17:49:08.616 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.616
2025-07-01 17:49:08.616 > yield from g
2025-07-01 17:49:08.616
2025-07-01 17:49:08.616 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.616 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.616
2025-07-01 17:49:08.616 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.616 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.616 alo = 416, ahi = 1101
2025-07-01 17:49:08.616 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.616 blo = 416, bhi = 1101
2025-07-01 17:49:08.616
2025-07-01 17:49:08.616 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.616 r"""
2025-07-01 17:49:08.616 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.616 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.616 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.616 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.616
2025-07-01 17:49:08.617 Example:
2025-07-01 17:49:08.617
2025-07-01 17:49:08.617 >>> d = Differ()
2025-07-01 17:49:08.617 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.617 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.617 >>> print(''.join(results), end="")
2025-07-01 17:49:08.617 - abcDefghiJkl
2025-07-01 17:49:08.617 + abcdefGhijkl
2025-07-01 17:49:08.617 """
2025-07-01 17:49:08.617
2025-07-01 17:49:08.617 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.617 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.617 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.617 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.617 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.617
2025-07-01 17:49:08.617 # search for the pair that matches best without being identical
2025-07-01 17:49:08.617 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.617 # on junk -- unless we have to)
2025-07-01 17:49:08.617 for j in range(blo, bhi):
2025-07-01 17:49:08.618 bj = b[j]
2025-07-01 17:49:08.618 cruncher.set_seq2(bj)
2025-07-01 17:49:08.618 for i in range(alo, ahi):
2025-07-01 17:49:08.618 ai = a[i]
2025-07-01 17:49:08.618 if ai == bj:
2025-07-01 17:49:08.618 if eqi is None:
2025-07-01 17:49:08.618 eqi, eqj = i, j
2025-07-01 17:49:08.618 continue
2025-07-01 17:49:08.618 cruncher.set_seq1(ai)
2025-07-01 17:49:08.618 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.618 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.618 # compares by a factor of 3.
2025-07-01 17:49:08.618 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.618 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.618 # of the computation is cached by cruncher
2025-07-01 17:49:08.618 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.618 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.618 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.618 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.618 if best_ratio < cutoff:
2025-07-01 17:49:08.618 # no non-identical "pretty close" pair
2025-07-01 17:49:08.619 if eqi is None:
2025-07-01 17:49:08.619 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.619 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.619 return
2025-07-01 17:49:08.619 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.619 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.619 else:
2025-07-01 17:49:08.619 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.619 eqi = None
2025-07-01 17:49:08.619
2025-07-01 17:49:08.619 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.619 # identical
2025-07-01 17:49:08.619
2025-07-01 17:49:08.619 # pump out diffs from before the synch point
2025-07-01 17:49:08.619 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.619
2025-07-01 17:49:08.619 # do intraline marking on the synch pair
2025-07-01 17:49:08.619 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.619 if eqi is None:
2025-07-01 17:49:08.620 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.620 atags = btags = ""
2025-07-01 17:49:08.620 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.620 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.620 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.620 if tag == 'replace':
2025-07-01 17:49:08.620 atags += '^' * la
2025-07-01 17:49:08.620 btags += '^' * lb
2025-07-01 17:49:08.620 elif tag == 'delete':
2025-07-01 17:49:08.620 atags += '-' * la
2025-07-01 17:49:08.620 elif tag == 'insert':
2025-07-01 17:49:08.620 btags += '+' * lb
2025-07-01 17:49:08.620 elif tag == 'equal':
2025-07-01 17:49:08.620 atags += ' ' * la
2025-07-01 17:49:08.620 btags += ' ' * lb
2025-07-01 17:49:08.620 else:
2025-07-01 17:49:08.620 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.620 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.620 else:
2025-07-01 17:49:08.620 # the synch pair is identical
2025-07-01 17:49:08.621 yield ' ' + aelt
2025-07-01 17:49:08.621
2025-07-01 17:49:08.621 # pump out diffs from after the synch point
2025-07-01 17:49:08.621 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.621
2025-07-01 17:49:08.621 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.621 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.621
2025-07-01 17:49:08.621 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.621 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.621 alo = 417, ahi = 1101
2025-07-01 17:49:08.621 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.621 blo = 417, bhi = 1101
2025-07-01 17:49:08.621
2025-07-01 17:49:08.621 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.621 g = []
2025-07-01 17:49:08.621 if alo < ahi:
2025-07-01 17:49:08.621 if blo < bhi:
2025-07-01 17:49:08.621 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.621 else:
2025-07-01 17:49:08.621 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.622 elif blo < bhi:
2025-07-01 17:49:08.622 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.622
2025-07-01 17:49:08.622 > yield from g
2025-07-01 17:49:08.622
2025-07-01 17:49:08.622 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.622 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.622
2025-07-01 17:49:08.622 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.622 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.622 alo = 417, ahi = 1101
2025-07-01 17:49:08.622 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.622 blo = 417, bhi = 1101
2025-07-01 17:49:08.622
2025-07-01 17:49:08.622 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.622 r"""
2025-07-01 17:49:08.622 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.622 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.622 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.622 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.623
2025-07-01 17:49:08.625 Example:
2025-07-01 17:49:08.626
2025-07-01 17:49:08.626 >>> d = Differ()
2025-07-01 17:49:08.626 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.626 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.626 >>> print(''.join(results), end="")
2025-07-01 17:49:08.626 - abcDefghiJkl
2025-07-01 17:49:08.626 + abcdefGhijkl
2025-07-01 17:49:08.626 """
2025-07-01 17:49:08.626
2025-07-01 17:49:08.626 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.626 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.626 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.626 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.626 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.626
2025-07-01 17:49:08.626 # search for the pair that matches best without being identical
2025-07-01 17:49:08.626 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.626 # on junk -- unless we have to)
2025-07-01 17:49:08.626 for j in range(blo, bhi):
2025-07-01 17:49:08.627 bj = b[j]
2025-07-01 17:49:08.627 cruncher.set_seq2(bj)
2025-07-01 17:49:08.627 for i in range(alo, ahi):
2025-07-01 17:49:08.627 ai = a[i]
2025-07-01 17:49:08.627 if ai == bj:
2025-07-01 17:49:08.627 if eqi is None:
2025-07-01 17:49:08.627 eqi, eqj = i, j
2025-07-01 17:49:08.627 continue
2025-07-01 17:49:08.627 cruncher.set_seq1(ai)
2025-07-01 17:49:08.627 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.627 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.627 # compares by a factor of 3.
2025-07-01 17:49:08.627 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.627 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.627 # of the computation is cached by cruncher
2025-07-01 17:49:08.627 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.627 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.627 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.627 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.627 if best_ratio < cutoff:
2025-07-01 17:49:08.627 # no non-identical "pretty close" pair
2025-07-01 17:49:08.627 if eqi is None:
2025-07-01 17:49:08.628 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.628 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.628 return
2025-07-01 17:49:08.628 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.628 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.628 else:
2025-07-01 17:49:08.628 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.628 eqi = None
2025-07-01 17:49:08.628
2025-07-01 17:49:08.628 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.628 # identical
2025-07-01 17:49:08.628
2025-07-01 17:49:08.628 # pump out diffs from before the synch point
2025-07-01 17:49:08.628 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.628
2025-07-01 17:49:08.628 # do intraline marking on the synch pair
2025-07-01 17:49:08.628 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.628 if eqi is None:
2025-07-01 17:49:08.628 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.628 atags = btags = ""
2025-07-01 17:49:08.628 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.628 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.629 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.629 if tag == 'replace':
2025-07-01 17:49:08.629 atags += '^' * la
2025-07-01 17:49:08.629 btags += '^' * lb
2025-07-01 17:49:08.629 elif tag == 'delete':
2025-07-01 17:49:08.629 atags += '-' * la
2025-07-01 17:49:08.629 elif tag == 'insert':
2025-07-01 17:49:08.629 btags += '+' * lb
2025-07-01 17:49:08.629 elif tag == 'equal':
2025-07-01 17:49:08.629 atags += ' ' * la
2025-07-01 17:49:08.629 btags += ' ' * lb
2025-07-01 17:49:08.629 else:
2025-07-01 17:49:08.629 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.629 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.629 else:
2025-07-01 17:49:08.629 # the synch pair is identical
2025-07-01 17:49:08.629 yield ' ' + aelt
2025-07-01 17:49:08.629
2025-07-01 17:49:08.629 # pump out diffs from after the synch point
2025-07-01 17:49:08.629 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.630
2025-07-01 17:49:08.630 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.630 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.630
2025-07-01 17:49:08.630 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.630 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.630 alo = 418, ahi = 1101
2025-07-01 17:49:08.630 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.630 blo = 418, bhi = 1101
2025-07-01 17:49:08.630
2025-07-01 17:49:08.630 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.630 g = []
2025-07-01 17:49:08.630 if alo < ahi:
2025-07-01 17:49:08.630 if blo < bhi:
2025-07-01 17:49:08.630 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.630 else:
2025-07-01 17:49:08.630 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.630 elif blo < bhi:
2025-07-01 17:49:08.631 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.631
2025-07-01 17:49:08.631 > yield from g
2025-07-01 17:49:08.631
2025-07-01 17:49:08.631 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.631 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.631
2025-07-01 17:49:08.631 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.631 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.631 alo = 418, ahi = 1101
2025-07-01 17:49:08.631 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.631 blo = 418, bhi = 1101
2025-07-01 17:49:08.631
2025-07-01 17:49:08.631 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.631 r"""
2025-07-01 17:49:08.631 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.631 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.631 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.631 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.631
2025-07-01 17:49:08.632 Example:
2025-07-01 17:49:08.632
2025-07-01 17:49:08.632 >>> d = Differ()
2025-07-01 17:49:08.632 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.632 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.632 >>> print(''.join(results), end="")
2025-07-01 17:49:08.632 - abcDefghiJkl
2025-07-01 17:49:08.632 + abcdefGhijkl
2025-07-01 17:49:08.632 """
2025-07-01 17:49:08.632
2025-07-01 17:49:08.632 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.632 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.632 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.632 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.632 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.632
2025-07-01 17:49:08.632 # search for the pair that matches best without being identical
2025-07-01 17:49:08.632 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.632 # on junk -- unless we have to)
2025-07-01 17:49:08.633 for j in range(blo, bhi):
2025-07-01 17:49:08.633 bj = b[j]
2025-07-01 17:49:08.633 cruncher.set_seq2(bj)
2025-07-01 17:49:08.633 for i in range(alo, ahi):
2025-07-01 17:49:08.633 ai = a[i]
2025-07-01 17:49:08.633 if ai == bj:
2025-07-01 17:49:08.633 if eqi is None:
2025-07-01 17:49:08.633 eqi, eqj = i, j
2025-07-01 17:49:08.633 continue
2025-07-01 17:49:08.633 cruncher.set_seq1(ai)
2025-07-01 17:49:08.633 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.633 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.633 # compares by a factor of 3.
2025-07-01 17:49:08.633 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.633 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.633 # of the computation is cached by cruncher
2025-07-01 17:49:08.633 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.633 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.633 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.633 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.633 if best_ratio < cutoff:
2025-07-01 17:49:08.634 # no non-identical "pretty close" pair
2025-07-01 17:49:08.634 if eqi is None:
2025-07-01 17:49:08.634 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.634 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.634 return
2025-07-01 17:49:08.634 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.634 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.634 else:
2025-07-01 17:49:08.634 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.634 eqi = None
2025-07-01 17:49:08.634
2025-07-01 17:49:08.634 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.634 # identical
2025-07-01 17:49:08.634
2025-07-01 17:49:08.634 # pump out diffs from before the synch point
2025-07-01 17:49:08.634 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.634
2025-07-01 17:49:08.634 # do intraline marking on the synch pair
2025-07-01 17:49:08.634 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.634 if eqi is None:
2025-07-01 17:49:08.635 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.635 atags = btags = ""
2025-07-01 17:49:08.635 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.635 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.635 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.635 if tag == 'replace':
2025-07-01 17:49:08.635 atags += '^' * la
2025-07-01 17:49:08.635 btags += '^' * lb
2025-07-01 17:49:08.635 elif tag == 'delete':
2025-07-01 17:49:08.635 atags += '-' * la
2025-07-01 17:49:08.635 elif tag == 'insert':
2025-07-01 17:49:08.635 btags += '+' * lb
2025-07-01 17:49:08.635 elif tag == 'equal':
2025-07-01 17:49:08.635 atags += ' ' * la
2025-07-01 17:49:08.635 btags += ' ' * lb
2025-07-01 17:49:08.635 else:
2025-07-01 17:49:08.635 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.635 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.635 else:
2025-07-01 17:49:08.635 # the synch pair is identical
2025-07-01 17:49:08.635 yield ' ' + aelt
2025-07-01 17:49:08.636
2025-07-01 17:49:08.636 # pump out diffs from after the synch point
2025-07-01 17:49:08.636 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.636
2025-07-01 17:49:08.636 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.636 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.636
2025-07-01 17:49:08.636 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.636 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.636 alo = 419, ahi = 1101
2025-07-01 17:49:08.636 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.636 blo = 419, bhi = 1101
2025-07-01 17:49:08.636
2025-07-01 17:49:08.636 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.636 g = []
2025-07-01 17:49:08.636 if alo < ahi:
2025-07-01 17:49:08.636 if blo < bhi:
2025-07-01 17:49:08.636 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.636 else:
2025-07-01 17:49:08.636 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.636 elif blo < bhi:
2025-07-01 17:49:08.637 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.637
2025-07-01 17:49:08.637 > yield from g
2025-07-01 17:49:08.637
2025-07-01 17:49:08.637 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.637 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.637
2025-07-01 17:49:08.637 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.637 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.637 alo = 419, ahi = 1101
2025-07-01 17:49:08.637 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.637 blo = 419, bhi = 1101
2025-07-01 17:49:08.637
2025-07-01 17:49:08.637 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.637 r"""
2025-07-01 17:49:08.637 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.637 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.637 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.637 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.637
2025-07-01 17:49:08.638 Example:
2025-07-01 17:49:08.638
2025-07-01 17:49:08.638 >>> d = Differ()
2025-07-01 17:49:08.638 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.638 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.638 >>> print(''.join(results), end="")
2025-07-01 17:49:08.638 - abcDefghiJkl
2025-07-01 17:49:08.638 + abcdefGhijkl
2025-07-01 17:49:08.638 """
2025-07-01 17:49:08.638
2025-07-01 17:49:08.638 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.638 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.638 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.638 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.638 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.638
2025-07-01 17:49:08.638 # search for the pair that matches best without being identical
2025-07-01 17:49:08.638 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.639 # on junk -- unless we have to)
2025-07-01 17:49:08.644 for j in range(blo, bhi):
2025-07-01 17:49:08.644 bj = b[j]
2025-07-01 17:49:08.644 cruncher.set_seq2(bj)
2025-07-01 17:49:08.644 for i in range(alo, ahi):
2025-07-01 17:49:08.644 ai = a[i]
2025-07-01 17:49:08.644 if ai == bj:
2025-07-01 17:49:08.644 if eqi is None:
2025-07-01 17:49:08.644 eqi, eqj = i, j
2025-07-01 17:49:08.644 continue
2025-07-01 17:49:08.644 cruncher.set_seq1(ai)
2025-07-01 17:49:08.644 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.644 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.644 # compares by a factor of 3.
2025-07-01 17:49:08.644 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.644 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.644 # of the computation is cached by cruncher
2025-07-01 17:49:08.644 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.644 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.644 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.645 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.645 if best_ratio < cutoff:
2025-07-01 17:49:08.645 # no non-identical "pretty close" pair
2025-07-01 17:49:08.645 if eqi is None:
2025-07-01 17:49:08.645 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.645 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.645 return
2025-07-01 17:49:08.645 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.645 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.645 else:
2025-07-01 17:49:08.645 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.645 eqi = None
2025-07-01 17:49:08.645
2025-07-01 17:49:08.645 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.645 # identical
2025-07-01 17:49:08.645
2025-07-01 17:49:08.645 # pump out diffs from before the synch point
2025-07-01 17:49:08.645 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.645
2025-07-01 17:49:08.645 # do intraline marking on the synch pair
2025-07-01 17:49:08.645 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.646 if eqi is None:
2025-07-01 17:49:08.646 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.646 atags = btags = ""
2025-07-01 17:49:08.646 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.646 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.646 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.646 if tag == 'replace':
2025-07-01 17:49:08.646 atags += '^' * la
2025-07-01 17:49:08.646 btags += '^' * lb
2025-07-01 17:49:08.646 elif tag == 'delete':
2025-07-01 17:49:08.646 atags += '-' * la
2025-07-01 17:49:08.646 elif tag == 'insert':
2025-07-01 17:49:08.646 btags += '+' * lb
2025-07-01 17:49:08.646 elif tag == 'equal':
2025-07-01 17:49:08.646 atags += ' ' * la
2025-07-01 17:49:08.646 btags += ' ' * lb
2025-07-01 17:49:08.646 else:
2025-07-01 17:49:08.646 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.646 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.647 else:
2025-07-01 17:49:08.647 # the synch pair is identical
2025-07-01 17:49:08.647 yield ' ' + aelt
2025-07-01 17:49:08.647
2025-07-01 17:49:08.647 # pump out diffs from after the synch point
2025-07-01 17:49:08.647 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.647
2025-07-01 17:49:08.647 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.647 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.647
2025-07-01 17:49:08.647 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.647 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.647 alo = 422, ahi = 1101
2025-07-01 17:49:08.647 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.647 blo = 422, bhi = 1101
2025-07-01 17:49:08.647
2025-07-01 17:49:08.647 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.647 g = []
2025-07-01 17:49:08.647 if alo < ahi:
2025-07-01 17:49:08.647 if blo < bhi:
2025-07-01 17:49:08.648 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.648 else:
2025-07-01 17:49:08.648 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.648 elif blo < bhi:
2025-07-01 17:49:08.648 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.648
2025-07-01 17:49:08.648 > yield from g
2025-07-01 17:49:08.648
2025-07-01 17:49:08.648 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.648 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.648
2025-07-01 17:49:08.648 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.648 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.648 alo = 422, ahi = 1101
2025-07-01 17:49:08.648 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.648 blo = 422, bhi = 1101
2025-07-01 17:49:08.648
2025-07-01 17:49:08.648 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.648 r"""
2025-07-01 17:49:08.648 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.648 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.649 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.649 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.649
2025-07-01 17:49:08.649 Example:
2025-07-01 17:49:08.649
2025-07-01 17:49:08.649 >>> d = Differ()
2025-07-01 17:49:08.649 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.649 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.649 >>> print(''.join(results), end="")
2025-07-01 17:49:08.649 - abcDefghiJkl
2025-07-01 17:49:08.649 + abcdefGhijkl
2025-07-01 17:49:08.649 """
2025-07-01 17:49:08.649
2025-07-01 17:49:08.649 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.649 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.649 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.649 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.649 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.649
2025-07-01 17:49:08.650 # search for the pair that matches best without being identical
2025-07-01 17:49:08.650 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.650 # on junk -- unless we have to)
2025-07-01 17:49:08.650 for j in range(blo, bhi):
2025-07-01 17:49:08.650 bj = b[j]
2025-07-01 17:49:08.650 cruncher.set_seq2(bj)
2025-07-01 17:49:08.650 for i in range(alo, ahi):
2025-07-01 17:49:08.650 ai = a[i]
2025-07-01 17:49:08.650 if ai == bj:
2025-07-01 17:49:08.650 if eqi is None:
2025-07-01 17:49:08.650 eqi, eqj = i, j
2025-07-01 17:49:08.650 continue
2025-07-01 17:49:08.650 cruncher.set_seq1(ai)
2025-07-01 17:49:08.650 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.650 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.650 # compares by a factor of 3.
2025-07-01 17:49:08.650 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.650 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.650 # of the computation is cached by cruncher
2025-07-01 17:49:08.650 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.650 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.650 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.651 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.651 if best_ratio < cutoff:
2025-07-01 17:49:08.651 # no non-identical "pretty close" pair
2025-07-01 17:49:08.651 if eqi is None:
2025-07-01 17:49:08.651 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.651 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.651 return
2025-07-01 17:49:08.651 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.651 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.651 else:
2025-07-01 17:49:08.651 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.651 eqi = None
2025-07-01 17:49:08.651
2025-07-01 17:49:08.651 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.651 # identical
2025-07-01 17:49:08.651
2025-07-01 17:49:08.651 # pump out diffs from before the synch point
2025-07-01 17:49:08.651 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.651
2025-07-01 17:49:08.651 # do intraline marking on the synch pair
2025-07-01 17:49:08.652 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.652 if eqi is None:
2025-07-01 17:49:08.652 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.652 atags = btags = ""
2025-07-01 17:49:08.652 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.652 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.652 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.652 if tag == 'replace':
2025-07-01 17:49:08.652 atags += '^' * la
2025-07-01 17:49:08.652 btags += '^' * lb
2025-07-01 17:49:08.652 elif tag == 'delete':
2025-07-01 17:49:08.652 atags += '-' * la
2025-07-01 17:49:08.652 elif tag == 'insert':
2025-07-01 17:49:08.652 btags += '+' * lb
2025-07-01 17:49:08.652 elif tag == 'equal':
2025-07-01 17:49:08.652 atags += ' ' * la
2025-07-01 17:49:08.652 btags += ' ' * lb
2025-07-01 17:49:08.652 else:
2025-07-01 17:49:08.652 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.653 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.653 else:
2025-07-01 17:49:08.653 # the synch pair is identical
2025-07-01 17:49:08.653 yield ' ' + aelt
2025-07-01 17:49:08.653
2025-07-01 17:49:08.653 # pump out diffs from after the synch point
2025-07-01 17:49:08.653 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.653
2025-07-01 17:49:08.653 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.653 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.653
2025-07-01 17:49:08.653 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.653 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.653 alo = 423, ahi = 1101
2025-07-01 17:49:08.653 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.653 blo = 423, bhi = 1101
2025-07-01 17:49:08.653
2025-07-01 17:49:08.653 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.653 g = []
2025-07-01 17:49:08.653 if alo < ahi:
2025-07-01 17:49:08.657 if blo < bhi:
2025-07-01 17:49:08.657 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.657 else:
2025-07-01 17:49:08.657 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.657 elif blo < bhi:
2025-07-01 17:49:08.657 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.657
2025-07-01 17:49:08.657 > yield from g
2025-07-01 17:49:08.657
2025-07-01 17:49:08.657 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.657 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.657
2025-07-01 17:49:08.657 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.657 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.657 alo = 423, ahi = 1101
2025-07-01 17:49:08.657 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.658 blo = 423, bhi = 1101
2025-07-01 17:49:08.658
2025-07-01 17:49:08.658 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.658 r"""
2025-07-01 17:49:08.658 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.658 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.658 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.658 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.658
2025-07-01 17:49:08.658 Example:
2025-07-01 17:49:08.658
2025-07-01 17:49:08.658 >>> d = Differ()
2025-07-01 17:49:08.658 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.658 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.658 >>> print(''.join(results), end="")
2025-07-01 17:49:08.658 - abcDefghiJkl
2025-07-01 17:49:08.658 + abcdefGhijkl
2025-07-01 17:49:08.658 """
2025-07-01 17:49:08.659
2025-07-01 17:49:08.659 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.659 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.659 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.659 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.659 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.659
2025-07-01 17:49:08.659 # search for the pair that matches best without being identical
2025-07-01 17:49:08.659 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.659 # on junk -- unless we have to)
2025-07-01 17:49:08.659 for j in range(blo, bhi):
2025-07-01 17:49:08.659 bj = b[j]
2025-07-01 17:49:08.659 cruncher.set_seq2(bj)
2025-07-01 17:49:08.659 for i in range(alo, ahi):
2025-07-01 17:49:08.659 ai = a[i]
2025-07-01 17:49:08.659 if ai == bj:
2025-07-01 17:49:08.659 if eqi is None:
2025-07-01 17:49:08.659 eqi, eqj = i, j
2025-07-01 17:49:08.659 continue
2025-07-01 17:49:08.659 cruncher.set_seq1(ai)
2025-07-01 17:49:08.659 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.660 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.660 # compares by a factor of 3.
2025-07-01 17:49:08.660 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.660 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.660 # of the computation is cached by cruncher
2025-07-01 17:49:08.660 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.660 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.660 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.660 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.660 if best_ratio < cutoff:
2025-07-01 17:49:08.660 # no non-identical "pretty close" pair
2025-07-01 17:49:08.660 if eqi is None:
2025-07-01 17:49:08.660 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.660 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.660 return
2025-07-01 17:49:08.660 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.660 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.660 else:
2025-07-01 17:49:08.660 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.660 eqi = None
2025-07-01 17:49:08.661
2025-07-01 17:49:08.661 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.661 # identical
2025-07-01 17:49:08.661
2025-07-01 17:49:08.661 # pump out diffs from before the synch point
2025-07-01 17:49:08.661 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.661
2025-07-01 17:49:08.661 # do intraline marking on the synch pair
2025-07-01 17:49:08.661 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.661 if eqi is None:
2025-07-01 17:49:08.661 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.661 atags = btags = ""
2025-07-01 17:49:08.661 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.661 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.661 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.661 if tag == 'replace':
2025-07-01 17:49:08.661 atags += '^' * la
2025-07-01 17:49:08.661 btags += '^' * lb
2025-07-01 17:49:08.661 elif tag == 'delete':
2025-07-01 17:49:08.661 atags += '-' * la
2025-07-01 17:49:08.662 elif tag == 'insert':
2025-07-01 17:49:08.662 btags += '+' * lb
2025-07-01 17:49:08.662 elif tag == 'equal':
2025-07-01 17:49:08.662 atags += ' ' * la
2025-07-01 17:49:08.662 btags += ' ' * lb
2025-07-01 17:49:08.662 else:
2025-07-01 17:49:08.662 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.662 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.662 else:
2025-07-01 17:49:08.662 # the synch pair is identical
2025-07-01 17:49:08.662 yield ' ' + aelt
2025-07-01 17:49:08.662
2025-07-01 17:49:08.662 # pump out diffs from after the synch point
2025-07-01 17:49:08.662 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.662
2025-07-01 17:49:08.662 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.662 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.662
2025-07-01 17:49:08.662 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.662 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.662 alo = 424, ahi = 1101
2025-07-01 17:49:08.663 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.663 blo = 424, bhi = 1101
2025-07-01 17:49:08.663
2025-07-01 17:49:08.663 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.663 g = []
2025-07-01 17:49:08.663 if alo < ahi:
2025-07-01 17:49:08.663 if blo < bhi:
2025-07-01 17:49:08.663 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.663 else:
2025-07-01 17:49:08.663 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.663 elif blo < bhi:
2025-07-01 17:49:08.663 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.663
2025-07-01 17:49:08.663 > yield from g
2025-07-01 17:49:08.663
2025-07-01 17:49:08.663 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.663 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.663
2025-07-01 17:49:08.663 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.663 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.664 alo = 424, ahi = 1101
2025-07-01 17:49:08.664 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.664 blo = 424, bhi = 1101
2025-07-01 17:49:08.664
2025-07-01 17:49:08.664 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.664 r"""
2025-07-01 17:49:08.664 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.664 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.664 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.664 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.664
2025-07-01 17:49:08.664 Example:
2025-07-01 17:49:08.664
2025-07-01 17:49:08.664 >>> d = Differ()
2025-07-01 17:49:08.664 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.664 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.664 >>> print(''.join(results), end="")
2025-07-01 17:49:08.664 - abcDefghiJkl
2025-07-01 17:49:08.665 + abcdefGhijkl
2025-07-01 17:49:08.665 """
2025-07-01 17:49:08.665
2025-07-01 17:49:08.665 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.665 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.665 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.665 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.665 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.665
2025-07-01 17:49:08.665 # search for the pair that matches best without being identical
2025-07-01 17:49:08.665 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.665 # on junk -- unless we have to)
2025-07-01 17:49:08.665 for j in range(blo, bhi):
2025-07-01 17:49:08.665 bj = b[j]
2025-07-01 17:49:08.665 cruncher.set_seq2(bj)
2025-07-01 17:49:08.665 for i in range(alo, ahi):
2025-07-01 17:49:08.665 ai = a[i]
2025-07-01 17:49:08.665 if ai == bj:
2025-07-01 17:49:08.665 if eqi is None:
2025-07-01 17:49:08.666 eqi, eqj = i, j
2025-07-01 17:49:08.666 continue
2025-07-01 17:49:08.666 cruncher.set_seq1(ai)
2025-07-01 17:49:08.666 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.666 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.666 # compares by a factor of 3.
2025-07-01 17:49:08.666 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.666 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.666 # of the computation is cached by cruncher
2025-07-01 17:49:08.666 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.666 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.666 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.666 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.666 if best_ratio < cutoff:
2025-07-01 17:49:08.666 # no non-identical "pretty close" pair
2025-07-01 17:49:08.666 if eqi is None:
2025-07-01 17:49:08.666 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.666 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.666 return
2025-07-01 17:49:08.666 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.667 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.667 else:
2025-07-01 17:49:08.667 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.667 eqi = None
2025-07-01 17:49:08.667
2025-07-01 17:49:08.667 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.667 # identical
2025-07-01 17:49:08.667
2025-07-01 17:49:08.667 # pump out diffs from before the synch point
2025-07-01 17:49:08.667 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.667
2025-07-01 17:49:08.667 # do intraline marking on the synch pair
2025-07-01 17:49:08.667 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.667 if eqi is None:
2025-07-01 17:49:08.667 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.667 atags = btags = ""
2025-07-01 17:49:08.667 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.667 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.667 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.667 if tag == 'replace':
2025-07-01 17:49:08.667 atags += '^' * la
2025-07-01 17:49:08.668 btags += '^' * lb
2025-07-01 17:49:08.668 elif tag == 'delete':
2025-07-01 17:49:08.668 atags += '-' * la
2025-07-01 17:49:08.668 elif tag == 'insert':
2025-07-01 17:49:08.668 btags += '+' * lb
2025-07-01 17:49:08.668 elif tag == 'equal':
2025-07-01 17:49:08.668 atags += ' ' * la
2025-07-01 17:49:08.668 btags += ' ' * lb
2025-07-01 17:49:08.668 else:
2025-07-01 17:49:08.668 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.668 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.668 else:
2025-07-01 17:49:08.668 # the synch pair is identical
2025-07-01 17:49:08.668 yield ' ' + aelt
2025-07-01 17:49:08.668
2025-07-01 17:49:08.668 # pump out diffs from after the synch point
2025-07-01 17:49:08.668 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.668
2025-07-01 17:49:08.668 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.668 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.668
2025-07-01 17:49:08.673 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.673 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.673 alo = 425, ahi = 1101
2025-07-01 17:49:08.674 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.674 blo = 425, bhi = 1101
2025-07-01 17:49:08.674
2025-07-01 17:49:08.674 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.674 g = []
2025-07-01 17:49:08.674 if alo < ahi:
2025-07-01 17:49:08.674 if blo < bhi:
2025-07-01 17:49:08.674 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.674 else:
2025-07-01 17:49:08.674 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.674 elif blo < bhi:
2025-07-01 17:49:08.674 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.674
2025-07-01 17:49:08.674 > yield from g
2025-07-01 17:49:08.674
2025-07-01 17:49:08.674 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.674 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.674
2025-07-01 17:49:08.674 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.674 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.674 alo = 425, ahi = 1101
2025-07-01 17:49:08.675 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.675 blo = 425, bhi = 1101
2025-07-01 17:49:08.675
2025-07-01 17:49:08.675 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.675 r"""
2025-07-01 17:49:08.675 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.675 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.675 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.675 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.675
2025-07-01 17:49:08.675 Example:
2025-07-01 17:49:08.675
2025-07-01 17:49:08.675 >>> d = Differ()
2025-07-01 17:49:08.675 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.675 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.675 >>> print(''.join(results), end="")
2025-07-01 17:49:08.675 - abcDefghiJkl
2025-07-01 17:49:08.675 + abcdefGhijkl
2025-07-01 17:49:08.675 """
2025-07-01 17:49:08.675
2025-07-01 17:49:08.676 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.676 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.676 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.676 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.676 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.676
2025-07-01 17:49:08.676 # search for the pair that matches best without being identical
2025-07-01 17:49:08.676 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.676 # on junk -- unless we have to)
2025-07-01 17:49:08.676 for j in range(blo, bhi):
2025-07-01 17:49:08.676 bj = b[j]
2025-07-01 17:49:08.676 cruncher.set_seq2(bj)
2025-07-01 17:49:08.676 for i in range(alo, ahi):
2025-07-01 17:49:08.676 ai = a[i]
2025-07-01 17:49:08.676 if ai == bj:
2025-07-01 17:49:08.676 if eqi is None:
2025-07-01 17:49:08.676 eqi, eqj = i, j
2025-07-01 17:49:08.676 continue
2025-07-01 17:49:08.676 cruncher.set_seq1(ai)
2025-07-01 17:49:08.676 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.676 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.676 # compares by a factor of 3.
2025-07-01 17:49:08.677 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.677 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.677 # of the computation is cached by cruncher
2025-07-01 17:49:08.677 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.677 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.677 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.677 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.677 if best_ratio < cutoff:
2025-07-01 17:49:08.677 # no non-identical "pretty close" pair
2025-07-01 17:49:08.677 if eqi is None:
2025-07-01 17:49:08.677 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.677 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.677 return
2025-07-01 17:49:08.677 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.677 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.677 else:
2025-07-01 17:49:08.677 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.677 eqi = None
2025-07-01 17:49:08.677
2025-07-01 17:49:08.677 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.678 # identical
2025-07-01 17:49:08.678
2025-07-01 17:49:08.678 # pump out diffs from before the synch point
2025-07-01 17:49:08.678 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.678
2025-07-01 17:49:08.678 # do intraline marking on the synch pair
2025-07-01 17:49:08.678 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.678 if eqi is None:
2025-07-01 17:49:08.678 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.678 atags = btags = ""
2025-07-01 17:49:08.678 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.678 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.678 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.678 if tag == 'replace':
2025-07-01 17:49:08.678 atags += '^' * la
2025-07-01 17:49:08.678 btags += '^' * lb
2025-07-01 17:49:08.678 elif tag == 'delete':
2025-07-01 17:49:08.678 atags += '-' * la
2025-07-01 17:49:08.678 elif tag == 'insert':
2025-07-01 17:49:08.678 btags += '+' * lb
2025-07-01 17:49:08.679 elif tag == 'equal':
2025-07-01 17:49:08.679 atags += ' ' * la
2025-07-01 17:49:08.679 btags += ' ' * lb
2025-07-01 17:49:08.679 else:
2025-07-01 17:49:08.679 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.679 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.679 else:
2025-07-01 17:49:08.679 # the synch pair is identical
2025-07-01 17:49:08.679 yield ' ' + aelt
2025-07-01 17:49:08.679
2025-07-01 17:49:08.679 # pump out diffs from after the synch point
2025-07-01 17:49:08.679 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.679
2025-07-01 17:49:08.679 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.679 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.679
2025-07-01 17:49:08.679 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.679 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.679 alo = 426, ahi = 1101
2025-07-01 17:49:08.679 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.679 blo = 426, bhi = 1101
2025-07-01 17:49:08.679
2025-07-01 17:49:08.680 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.680 g = []
2025-07-01 17:49:08.680 if alo < ahi:
2025-07-01 17:49:08.680 if blo < bhi:
2025-07-01 17:49:08.680 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.680 else:
2025-07-01 17:49:08.680 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.680 elif blo < bhi:
2025-07-01 17:49:08.680 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.680
2025-07-01 17:49:08.680 > yield from g
2025-07-01 17:49:08.680
2025-07-01 17:49:08.680 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.680 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.680
2025-07-01 17:49:08.680 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.680 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.680 alo = 426, ahi = 1101
2025-07-01 17:49:08.680 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.680 blo = 426, bhi = 1101
2025-07-01 17:49:08.680
2025-07-01 17:49:08.680 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.681 r"""
2025-07-01 17:49:08.681 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.681 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.681 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.681 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.681
2025-07-01 17:49:08.681 Example:
2025-07-01 17:49:08.681
2025-07-01 17:49:08.681 >>> d = Differ()
2025-07-01 17:49:08.681 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.681 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.681 >>> print(''.join(results), end="")
2025-07-01 17:49:08.681 - abcDefghiJkl
2025-07-01 17:49:08.681 + abcdefGhijkl
2025-07-01 17:49:08.681 """
2025-07-01 17:49:08.681
2025-07-01 17:49:08.681 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.681 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.681 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.682 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.682 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.682
2025-07-01 17:49:08.682 # search for the pair that matches best without being identical
2025-07-01 17:49:08.682 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.682 # on junk -- unless we have to)
2025-07-01 17:49:08.682 for j in range(blo, bhi):
2025-07-01 17:49:08.682 bj = b[j]
2025-07-01 17:49:08.682 cruncher.set_seq2(bj)
2025-07-01 17:49:08.682 for i in range(alo, ahi):
2025-07-01 17:49:08.682 ai = a[i]
2025-07-01 17:49:08.682 if ai == bj:
2025-07-01 17:49:08.682 if eqi is None:
2025-07-01 17:49:08.682 eqi, eqj = i, j
2025-07-01 17:49:08.682 continue
2025-07-01 17:49:08.682 cruncher.set_seq1(ai)
2025-07-01 17:49:08.682 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.682 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.682 # compares by a factor of 3.
2025-07-01 17:49:08.682 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.682 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.682 # of the computation is cached by cruncher
2025-07-01 17:49:08.683 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.683 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.683 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.683 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.683 if best_ratio < cutoff:
2025-07-01 17:49:08.683 # no non-identical "pretty close" pair
2025-07-01 17:49:08.683 if eqi is None:
2025-07-01 17:49:08.683 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.683 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.683 return
2025-07-01 17:49:08.683 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.683 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.683 else:
2025-07-01 17:49:08.683 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.683 eqi = None
2025-07-01 17:49:08.683
2025-07-01 17:49:08.683 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.683 # identical
2025-07-01 17:49:08.683
2025-07-01 17:49:08.683 # pump out diffs from before the synch point
2025-07-01 17:49:08.683 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.684
2025-07-01 17:49:08.687 # do intraline marking on the synch pair
2025-07-01 17:49:08.687 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.687 if eqi is None:
2025-07-01 17:49:08.687 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.687 atags = btags = ""
2025-07-01 17:49:08.687 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.687 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.687 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.687 if tag == 'replace':
2025-07-01 17:49:08.687 atags += '^' * la
2025-07-01 17:49:08.687 btags += '^' * lb
2025-07-01 17:49:08.687 elif tag == 'delete':
2025-07-01 17:49:08.687 atags += '-' * la
2025-07-01 17:49:08.687 elif tag == 'insert':
2025-07-01 17:49:08.687 btags += '+' * lb
2025-07-01 17:49:08.687 elif tag == 'equal':
2025-07-01 17:49:08.687 atags += ' ' * la
2025-07-01 17:49:08.687 btags += ' ' * lb
2025-07-01 17:49:08.687 else:
2025-07-01 17:49:08.687 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.688 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.688 else:
2025-07-01 17:49:08.688 # the synch pair is identical
2025-07-01 17:49:08.688 yield ' ' + aelt
2025-07-01 17:49:08.688
2025-07-01 17:49:08.688 # pump out diffs from after the synch point
2025-07-01 17:49:08.688 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.688
2025-07-01 17:49:08.688 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.688 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.688
2025-07-01 17:49:08.688 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.688 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.688 alo = 427, ahi = 1101
2025-07-01 17:49:08.688 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.688 blo = 427, bhi = 1101
2025-07-01 17:49:08.688
2025-07-01 17:49:08.688 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.689 g = []
2025-07-01 17:49:08.689 if alo < ahi:
2025-07-01 17:49:08.689 if blo < bhi:
2025-07-01 17:49:08.689 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.689 else:
2025-07-01 17:49:08.689 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.689 elif blo < bhi:
2025-07-01 17:49:08.689 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.689
2025-07-01 17:49:08.689 > yield from g
2025-07-01 17:49:08.689
2025-07-01 17:49:08.689 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.689 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.689
2025-07-01 17:49:08.689 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.689 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.689 alo = 427, ahi = 1101
2025-07-01 17:49:08.689 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.689 blo = 427, bhi = 1101
2025-07-01 17:49:08.689
2025-07-01 17:49:08.689 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.690 r"""
2025-07-01 17:49:08.690 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.690 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.690 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.690 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.690
2025-07-01 17:49:08.690 Example:
2025-07-01 17:49:08.691
2025-07-01 17:49:08.691 >>> d = Differ()
2025-07-01 17:49:08.691 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.691 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.691 >>> print(''.join(results), end="")
2025-07-01 17:49:08.691 - abcDefghiJkl
2025-07-01 17:49:08.691 + abcdefGhijkl
2025-07-01 17:49:08.691 """
2025-07-01 17:49:08.691
2025-07-01 17:49:08.691 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.691 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.691 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.691 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.691 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.691
2025-07-01 17:49:08.691 # search for the pair that matches best without being identical
2025-07-01 17:49:08.691 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.691 # on junk -- unless we have to)
2025-07-01 17:49:08.692 for j in range(blo, bhi):
2025-07-01 17:49:08.692 bj = b[j]
2025-07-01 17:49:08.692 cruncher.set_seq2(bj)
2025-07-01 17:49:08.692 for i in range(alo, ahi):
2025-07-01 17:49:08.692 ai = a[i]
2025-07-01 17:49:08.692 if ai == bj:
2025-07-01 17:49:08.692 if eqi is None:
2025-07-01 17:49:08.692 eqi, eqj = i, j
2025-07-01 17:49:08.692 continue
2025-07-01 17:49:08.692 cruncher.set_seq1(ai)
2025-07-01 17:49:08.692 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.692 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.692 # compares by a factor of 3.
2025-07-01 17:49:08.692 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.692 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.692 # of the computation is cached by cruncher
2025-07-01 17:49:08.692 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.692 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.692 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.692 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.692 if best_ratio < cutoff:
2025-07-01 17:49:08.693 # no non-identical "pretty close" pair
2025-07-01 17:49:08.693 if eqi is None:
2025-07-01 17:49:08.693 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.693 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.693 return
2025-07-01 17:49:08.693 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.693 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.693 else:
2025-07-01 17:49:08.693 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.693 eqi = None
2025-07-01 17:49:08.693
2025-07-01 17:49:08.693 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.693 # identical
2025-07-01 17:49:08.693
2025-07-01 17:49:08.693 # pump out diffs from before the synch point
2025-07-01 17:49:08.693 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.693
2025-07-01 17:49:08.693 # do intraline marking on the synch pair
2025-07-01 17:49:08.693 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.693 if eqi is None:
2025-07-01 17:49:08.693 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.694 atags = btags = ""
2025-07-01 17:49:08.694 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.694 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.694 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.694 if tag == 'replace':
2025-07-01 17:49:08.694 atags += '^' * la
2025-07-01 17:49:08.694 btags += '^' * lb
2025-07-01 17:49:08.694 elif tag == 'delete':
2025-07-01 17:49:08.694 atags += '-' * la
2025-07-01 17:49:08.694 elif tag == 'insert':
2025-07-01 17:49:08.694 btags += '+' * lb
2025-07-01 17:49:08.694 elif tag == 'equal':
2025-07-01 17:49:08.694 atags += ' ' * la
2025-07-01 17:49:08.694 btags += ' ' * lb
2025-07-01 17:49:08.694 else:
2025-07-01 17:49:08.694 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.694 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.694 else:
2025-07-01 17:49:08.694 # the synch pair is identical
2025-07-01 17:49:08.694 yield ' ' + aelt
2025-07-01 17:49:08.694
2025-07-01 17:49:08.694 # pump out diffs from after the synch point
2025-07-01 17:49:08.695 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.695
2025-07-01 17:49:08.695 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.695 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.695
2025-07-01 17:49:08.695 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.695 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.695 alo = 428, ahi = 1101
2025-07-01 17:49:08.695 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.695 blo = 428, bhi = 1101
2025-07-01 17:49:08.695
2025-07-01 17:49:08.695 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.695 g = []
2025-07-01 17:49:08.695 if alo < ahi:
2025-07-01 17:49:08.695 if blo < bhi:
2025-07-01 17:49:08.695 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.695 else:
2025-07-01 17:49:08.695 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.695 elif blo < bhi:
2025-07-01 17:49:08.695 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.695
2025-07-01 17:49:08.696 > yield from g
2025-07-01 17:49:08.696
2025-07-01 17:49:08.696 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.696 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.696
2025-07-01 17:49:08.696 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.696 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.696 alo = 428, ahi = 1101
2025-07-01 17:49:08.696 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.696 blo = 428, bhi = 1101
2025-07-01 17:49:08.696
2025-07-01 17:49:08.696 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.696 r"""
2025-07-01 17:49:08.696 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.696 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.696 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.696 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.696
2025-07-01 17:49:08.696 Example:
2025-07-01 17:49:08.696
2025-07-01 17:49:08.696 >>> d = Differ()
2025-07-01 17:49:08.697 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.697 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.697 >>> print(''.join(results), end="")
2025-07-01 17:49:08.697 - abcDefghiJkl
2025-07-01 17:49:08.697 + abcdefGhijkl
2025-07-01 17:49:08.697 """
2025-07-01 17:49:08.697
2025-07-01 17:49:08.697 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.697 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.697 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.697 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.697 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.697
2025-07-01 17:49:08.697 # search for the pair that matches best without being identical
2025-07-01 17:49:08.697 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.697 # on junk -- unless we have to)
2025-07-01 17:49:08.697 for j in range(blo, bhi):
2025-07-01 17:49:08.698 bj = b[j]
2025-07-01 17:49:08.698 cruncher.set_seq2(bj)
2025-07-01 17:49:08.698 for i in range(alo, ahi):
2025-07-01 17:49:08.698 ai = a[i]
2025-07-01 17:49:08.698 if ai == bj:
2025-07-01 17:49:08.698 if eqi is None:
2025-07-01 17:49:08.698 eqi, eqj = i, j
2025-07-01 17:49:08.698 continue
2025-07-01 17:49:08.698 cruncher.set_seq1(ai)
2025-07-01 17:49:08.698 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.698 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.698 # compares by a factor of 3.
2025-07-01 17:49:08.698 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.698 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.698 # of the computation is cached by cruncher
2025-07-01 17:49:08.698 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.698 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.698 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.698 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.698 if best_ratio < cutoff:
2025-07-01 17:49:08.699 # no non-identical "pretty close" pair
2025-07-01 17:49:08.699 if eqi is None:
2025-07-01 17:49:08.699 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.699 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.699 return
2025-07-01 17:49:08.699 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.699 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.699 else:
2025-07-01 17:49:08.699 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.699 eqi = None
2025-07-01 17:49:08.699
2025-07-01 17:49:08.699 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.699 # identical
2025-07-01 17:49:08.699
2025-07-01 17:49:08.699 # pump out diffs from before the synch point
2025-07-01 17:49:08.699 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.699
2025-07-01 17:49:08.699 # do intraline marking on the synch pair
2025-07-01 17:49:08.699 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.699 if eqi is None:
2025-07-01 17:49:08.699 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.700 atags = btags = ""
2025-07-01 17:49:08.704 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.705 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.705 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.705 if tag == 'replace':
2025-07-01 17:49:08.705 atags += '^' * la
2025-07-01 17:49:08.705 btags += '^' * lb
2025-07-01 17:49:08.705 elif tag == 'delete':
2025-07-01 17:49:08.705 atags += '-' * la
2025-07-01 17:49:08.705 elif tag == 'insert':
2025-07-01 17:49:08.705 btags += '+' * lb
2025-07-01 17:49:08.705 elif tag == 'equal':
2025-07-01 17:49:08.705 atags += ' ' * la
2025-07-01 17:49:08.705 btags += ' ' * lb
2025-07-01 17:49:08.705 else:
2025-07-01 17:49:08.705 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.705 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.705 else:
2025-07-01 17:49:08.705 # the synch pair is identical
2025-07-01 17:49:08.705 yield ' ' + aelt
2025-07-01 17:49:08.705
2025-07-01 17:49:08.705 # pump out diffs from after the synch point
2025-07-01 17:49:08.706 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.706
2025-07-01 17:49:08.706 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.706 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.706
2025-07-01 17:49:08.706 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.706 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.706 alo = 429, ahi = 1101
2025-07-01 17:49:08.706 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.706 blo = 429, bhi = 1101
2025-07-01 17:49:08.706
2025-07-01 17:49:08.706 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.706 g = []
2025-07-01 17:49:08.706 if alo < ahi:
2025-07-01 17:49:08.706 if blo < bhi:
2025-07-01 17:49:08.706 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.706 else:
2025-07-01 17:49:08.706 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.706 elif blo < bhi:
2025-07-01 17:49:08.706 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.706
2025-07-01 17:49:08.707 > yield from g
2025-07-01 17:49:08.707
2025-07-01 17:49:08.707 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.707 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.707
2025-07-01 17:49:08.707 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.707 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.707 alo = 429, ahi = 1101
2025-07-01 17:49:08.707 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.707 blo = 429, bhi = 1101
2025-07-01 17:49:08.707
2025-07-01 17:49:08.707 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.707 r"""
2025-07-01 17:49:08.707 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.707 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.707 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.707 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.707
2025-07-01 17:49:08.708 Example:
2025-07-01 17:49:08.708
2025-07-01 17:49:08.708 >>> d = Differ()
2025-07-01 17:49:08.708 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.708 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.708 >>> print(''.join(results), end="")
2025-07-01 17:49:08.708 - abcDefghiJkl
2025-07-01 17:49:08.708 + abcdefGhijkl
2025-07-01 17:49:08.708 """
2025-07-01 17:49:08.708
2025-07-01 17:49:08.708 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.708 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.708 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.708 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.708 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.708
2025-07-01 17:49:08.708 # search for the pair that matches best without being identical
2025-07-01 17:49:08.708 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.708 # on junk -- unless we have to)
2025-07-01 17:49:08.709 for j in range(blo, bhi):
2025-07-01 17:49:08.709 bj = b[j]
2025-07-01 17:49:08.709 cruncher.set_seq2(bj)
2025-07-01 17:49:08.709 for i in range(alo, ahi):
2025-07-01 17:49:08.709 ai = a[i]
2025-07-01 17:49:08.709 if ai == bj:
2025-07-01 17:49:08.709 if eqi is None:
2025-07-01 17:49:08.709 eqi, eqj = i, j
2025-07-01 17:49:08.709 continue
2025-07-01 17:49:08.709 cruncher.set_seq1(ai)
2025-07-01 17:49:08.709 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.709 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.709 # compares by a factor of 3.
2025-07-01 17:49:08.709 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.709 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.709 # of the computation is cached by cruncher
2025-07-01 17:49:08.709 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.709 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.709 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.709 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.709 if best_ratio < cutoff:
2025-07-01 17:49:08.710 # no non-identical "pretty close" pair
2025-07-01 17:49:08.710 if eqi is None:
2025-07-01 17:49:08.710 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.710 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.710 return
2025-07-01 17:49:08.710 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.710 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.710 else:
2025-07-01 17:49:08.710 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.710 eqi = None
2025-07-01 17:49:08.710
2025-07-01 17:49:08.710 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.710 # identical
2025-07-01 17:49:08.710
2025-07-01 17:49:08.710 # pump out diffs from before the synch point
2025-07-01 17:49:08.710 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.710
2025-07-01 17:49:08.710 # do intraline marking on the synch pair
2025-07-01 17:49:08.711 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.711 if eqi is None:
2025-07-01 17:49:08.711 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.711 atags = btags = ""
2025-07-01 17:49:08.711 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.711 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.711 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.711 if tag == 'replace':
2025-07-01 17:49:08.711 atags += '^' * la
2025-07-01 17:49:08.711 btags += '^' * lb
2025-07-01 17:49:08.711 elif tag == 'delete':
2025-07-01 17:49:08.711 atags += '-' * la
2025-07-01 17:49:08.711 elif tag == 'insert':
2025-07-01 17:49:08.711 btags += '+' * lb
2025-07-01 17:49:08.711 elif tag == 'equal':
2025-07-01 17:49:08.711 atags += ' ' * la
2025-07-01 17:49:08.711 btags += ' ' * lb
2025-07-01 17:49:08.711 else:
2025-07-01 17:49:08.711 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.711 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.711 else:
2025-07-01 17:49:08.712 # the synch pair is identical
2025-07-01 17:49:08.712 yield ' ' + aelt
2025-07-01 17:49:08.712
2025-07-01 17:49:08.712 # pump out diffs from after the synch point
2025-07-01 17:49:08.712 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.712
2025-07-01 17:49:08.712 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.712 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.712
2025-07-01 17:49:08.712 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.712 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.712 alo = 430, ahi = 1101
2025-07-01 17:49:08.712 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.712 blo = 430, bhi = 1101
2025-07-01 17:49:08.712
2025-07-01 17:49:08.712 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.712 g = []
2025-07-01 17:49:08.712 if alo < ahi:
2025-07-01 17:49:08.712 if blo < bhi:
2025-07-01 17:49:08.712 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.713 else:
2025-07-01 17:49:08.713 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.713 elif blo < bhi:
2025-07-01 17:49:08.713 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.713
2025-07-01 17:49:08.713 > yield from g
2025-07-01 17:49:08.713
2025-07-01 17:49:08.713 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.713 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.713
2025-07-01 17:49:08.713 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.713 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.713 alo = 430, ahi = 1101
2025-07-01 17:49:08.713 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.713 blo = 430, bhi = 1101
2025-07-01 17:49:08.713
2025-07-01 17:49:08.713 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.713 r"""
2025-07-01 17:49:08.713 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.713 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.714 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.714 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.714
2025-07-01 17:49:08.714 Example:
2025-07-01 17:49:08.714
2025-07-01 17:49:08.714 >>> d = Differ()
2025-07-01 17:49:08.714 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.714 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.714 >>> print(''.join(results), end="")
2025-07-01 17:49:08.714 - abcDefghiJkl
2025-07-01 17:49:08.714 + abcdefGhijkl
2025-07-01 17:49:08.714 """
2025-07-01 17:49:08.714
2025-07-01 17:49:08.714 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.714 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.714 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.714 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.714 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.714
2025-07-01 17:49:08.715 # search for the pair that matches best without being identical
2025-07-01 17:49:08.717 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.717 # on junk -- unless we have to)
2025-07-01 17:49:08.718 for j in range(blo, bhi):
2025-07-01 17:49:08.718 bj = b[j]
2025-07-01 17:49:08.718 cruncher.set_seq2(bj)
2025-07-01 17:49:08.718 for i in range(alo, ahi):
2025-07-01 17:49:08.718 ai = a[i]
2025-07-01 17:49:08.718 if ai == bj:
2025-07-01 17:49:08.718 if eqi is None:
2025-07-01 17:49:08.718 eqi, eqj = i, j
2025-07-01 17:49:08.718 continue
2025-07-01 17:49:08.718 cruncher.set_seq1(ai)
2025-07-01 17:49:08.718 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.718 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.718 # compares by a factor of 3.
2025-07-01 17:49:08.718 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.718 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.718 # of the computation is cached by cruncher
2025-07-01 17:49:08.718 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.718 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.718 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.718 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.719 if best_ratio < cutoff:
2025-07-01 17:49:08.719 # no non-identical "pretty close" pair
2025-07-01 17:49:08.719 if eqi is None:
2025-07-01 17:49:08.719 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.719 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.719 return
2025-07-01 17:49:08.719 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.719 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.719 else:
2025-07-01 17:49:08.719 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.719 eqi = None
2025-07-01 17:49:08.719
2025-07-01 17:49:08.719 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.719 # identical
2025-07-01 17:49:08.719
2025-07-01 17:49:08.719 # pump out diffs from before the synch point
2025-07-01 17:49:08.719 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.719
2025-07-01 17:49:08.719 # do intraline marking on the synch pair
2025-07-01 17:49:08.719 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.720 if eqi is None:
2025-07-01 17:49:08.720 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.720 atags = btags = ""
2025-07-01 17:49:08.720 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.720 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.720 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.720 if tag == 'replace':
2025-07-01 17:49:08.720 atags += '^' * la
2025-07-01 17:49:08.720 btags += '^' * lb
2025-07-01 17:49:08.720 elif tag == 'delete':
2025-07-01 17:49:08.720 atags += '-' * la
2025-07-01 17:49:08.720 elif tag == 'insert':
2025-07-01 17:49:08.720 btags += '+' * lb
2025-07-01 17:49:08.720 elif tag == 'equal':
2025-07-01 17:49:08.720 atags += ' ' * la
2025-07-01 17:49:08.720 btags += ' ' * lb
2025-07-01 17:49:08.720 else:
2025-07-01 17:49:08.720 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.720 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.720 else:
2025-07-01 17:49:08.721 # the synch pair is identical
2025-07-01 17:49:08.721 yield ' ' + aelt
2025-07-01 17:49:08.721
2025-07-01 17:49:08.721 # pump out diffs from after the synch point
2025-07-01 17:49:08.721 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.721
2025-07-01 17:49:08.721 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.721 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.721
2025-07-01 17:49:08.721 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.721 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.721 alo = 431, ahi = 1101
2025-07-01 17:49:08.721 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.721 blo = 431, bhi = 1101
2025-07-01 17:49:08.721
2025-07-01 17:49:08.721 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.722 g = []
2025-07-01 17:49:08.722 if alo < ahi:
2025-07-01 17:49:08.722 if blo < bhi:
2025-07-01 17:49:08.722 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.722 else:
2025-07-01 17:49:08.722 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.722 elif blo < bhi:
2025-07-01 17:49:08.722 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.722
2025-07-01 17:49:08.722 > yield from g
2025-07-01 17:49:08.722
2025-07-01 17:49:08.722 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.722 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.722
2025-07-01 17:49:08.722 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.722 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.722 alo = 431, ahi = 1101
2025-07-01 17:49:08.722 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.722 blo = 431, bhi = 1101
2025-07-01 17:49:08.722
2025-07-01 17:49:08.723 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.723 r"""
2025-07-01 17:49:08.723 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.723 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.723 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.723 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.723
2025-07-01 17:49:08.723 Example:
2025-07-01 17:49:08.723
2025-07-01 17:49:08.723 >>> d = Differ()
2025-07-01 17:49:08.723 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.723 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.723 >>> print(''.join(results), end="")
2025-07-01 17:49:08.723 - abcDefghiJkl
2025-07-01 17:49:08.723 + abcdefGhijkl
2025-07-01 17:49:08.723 """
2025-07-01 17:49:08.723
2025-07-01 17:49:08.723 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.724 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.724 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.724 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.724 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.724
2025-07-01 17:49:08.724 # search for the pair that matches best without being identical
2025-07-01 17:49:08.724 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.724 # on junk -- unless we have to)
2025-07-01 17:49:08.724 for j in range(blo, bhi):
2025-07-01 17:49:08.724 bj = b[j]
2025-07-01 17:49:08.724 cruncher.set_seq2(bj)
2025-07-01 17:49:08.724 for i in range(alo, ahi):
2025-07-01 17:49:08.724 ai = a[i]
2025-07-01 17:49:08.724 if ai == bj:
2025-07-01 17:49:08.724 if eqi is None:
2025-07-01 17:49:08.724 eqi, eqj = i, j
2025-07-01 17:49:08.724 continue
2025-07-01 17:49:08.724 cruncher.set_seq1(ai)
2025-07-01 17:49:08.724 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.724 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.724 # compares by a factor of 3.
2025-07-01 17:49:08.725 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.725 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.725 # of the computation is cached by cruncher
2025-07-01 17:49:08.725 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.725 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.725 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.725 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.725 if best_ratio < cutoff:
2025-07-01 17:49:08.725 # no non-identical "pretty close" pair
2025-07-01 17:49:08.725 if eqi is None:
2025-07-01 17:49:08.725 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.725 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.725 return
2025-07-01 17:49:08.725 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.725 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.725 else:
2025-07-01 17:49:08.725 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.725 eqi = None
2025-07-01 17:49:08.725
2025-07-01 17:49:08.725 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.726 # identical
2025-07-01 17:49:08.726
2025-07-01 17:49:08.726 # pump out diffs from before the synch point
2025-07-01 17:49:08.726 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.726
2025-07-01 17:49:08.726 # do intraline marking on the synch pair
2025-07-01 17:49:08.726 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.726 if eqi is None:
2025-07-01 17:49:08.726 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.726 atags = btags = ""
2025-07-01 17:49:08.726 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.726 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.726 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.726 if tag == 'replace':
2025-07-01 17:49:08.726 atags += '^' * la
2025-07-01 17:49:08.726 btags += '^' * lb
2025-07-01 17:49:08.726 elif tag == 'delete':
2025-07-01 17:49:08.726 atags += '-' * la
2025-07-01 17:49:08.726 elif tag == 'insert':
2025-07-01 17:49:08.726 btags += '+' * lb
2025-07-01 17:49:08.726 elif tag == 'equal':
2025-07-01 17:49:08.727 atags += ' ' * la
2025-07-01 17:49:08.727 btags += ' ' * lb
2025-07-01 17:49:08.727 else:
2025-07-01 17:49:08.727 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.727 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.727 else:
2025-07-01 17:49:08.727 # the synch pair is identical
2025-07-01 17:49:08.727 yield ' ' + aelt
2025-07-01 17:49:08.727
2025-07-01 17:49:08.727 # pump out diffs from after the synch point
2025-07-01 17:49:08.727 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.727
2025-07-01 17:49:08.727 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.727 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.727
2025-07-01 17:49:08.727 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.727 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.727 alo = 432, ahi = 1101
2025-07-01 17:49:08.727 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.727 blo = 432, bhi = 1101
2025-07-01 17:49:08.727
2025-07-01 17:49:08.728 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.728 g = []
2025-07-01 17:49:08.728 if alo < ahi:
2025-07-01 17:49:08.728 if blo < bhi:
2025-07-01 17:49:08.728 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.728 else:
2025-07-01 17:49:08.728 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.728 elif blo < bhi:
2025-07-01 17:49:08.728 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.728
2025-07-01 17:49:08.728 > yield from g
2025-07-01 17:49:08.728
2025-07-01 17:49:08.728 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.728 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.728
2025-07-01 17:49:08.728 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.728 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.728 alo = 432, ahi = 1101
2025-07-01 17:49:08.728 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.728 blo = 432, bhi = 1101
2025-07-01 17:49:08.729
2025-07-01 17:49:08.729 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.729 r"""
2025-07-01 17:49:08.729 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.729 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.729 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.729 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.729
2025-07-01 17:49:08.729 Example:
2025-07-01 17:49:08.729
2025-07-01 17:49:08.729 >>> d = Differ()
2025-07-01 17:49:08.729 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.729 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.729 >>> print(''.join(results), end="")
2025-07-01 17:49:08.729 - abcDefghiJkl
2025-07-01 17:49:08.729 + abcdefGhijkl
2025-07-01 17:49:08.729 """
2025-07-01 17:49:08.729
2025-07-01 17:49:08.729 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.730 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.730 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.730 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.730 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.730
2025-07-01 17:49:08.730 # search for the pair that matches best without being identical
2025-07-01 17:49:08.730 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.730 # on junk -- unless we have to)
2025-07-01 17:49:08.730 for j in range(blo, bhi):
2025-07-01 17:49:08.730 bj = b[j]
2025-07-01 17:49:08.730 cruncher.set_seq2(bj)
2025-07-01 17:49:08.730 for i in range(alo, ahi):
2025-07-01 17:49:08.730 ai = a[i]
2025-07-01 17:49:08.730 if ai == bj:
2025-07-01 17:49:08.730 if eqi is None:
2025-07-01 17:49:08.730 eqi, eqj = i, j
2025-07-01 17:49:08.730 continue
2025-07-01 17:49:08.730 cruncher.set_seq1(ai)
2025-07-01 17:49:08.730 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.730 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.731 # compares by a factor of 3.
2025-07-01 17:49:08.736 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.736 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.736 # of the computation is cached by cruncher
2025-07-01 17:49:08.736 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.736 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.736 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.736 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.736 if best_ratio < cutoff:
2025-07-01 17:49:08.736 # no non-identical "pretty close" pair
2025-07-01 17:49:08.736 if eqi is None:
2025-07-01 17:49:08.736 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.736 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.736 return
2025-07-01 17:49:08.736 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.736 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.736 else:
2025-07-01 17:49:08.736 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.736 eqi = None
2025-07-01 17:49:08.736
2025-07-01 17:49:08.737 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.737 # identical
2025-07-01 17:49:08.737
2025-07-01 17:49:08.737 # pump out diffs from before the synch point
2025-07-01 17:49:08.737 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.737
2025-07-01 17:49:08.737 # do intraline marking on the synch pair
2025-07-01 17:49:08.737 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.737 if eqi is None:
2025-07-01 17:49:08.737 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.737 atags = btags = ""
2025-07-01 17:49:08.737 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.737 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.737 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.737 if tag == 'replace':
2025-07-01 17:49:08.737 atags += '^' * la
2025-07-01 17:49:08.737 btags += '^' * lb
2025-07-01 17:49:08.737 elif tag == 'delete':
2025-07-01 17:49:08.737 atags += '-' * la
2025-07-01 17:49:08.737 elif tag == 'insert':
2025-07-01 17:49:08.737 btags += '+' * lb
2025-07-01 17:49:08.737 elif tag == 'equal':
2025-07-01 17:49:08.738 atags += ' ' * la
2025-07-01 17:49:08.738 btags += ' ' * lb
2025-07-01 17:49:08.738 else:
2025-07-01 17:49:08.738 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.738 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.738 else:
2025-07-01 17:49:08.738 # the synch pair is identical
2025-07-01 17:49:08.738 yield ' ' + aelt
2025-07-01 17:49:08.738
2025-07-01 17:49:08.738 # pump out diffs from after the synch point
2025-07-01 17:49:08.738 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.738
2025-07-01 17:49:08.738 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.738 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.738
2025-07-01 17:49:08.738 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.738 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.738 alo = 433, ahi = 1101
2025-07-01 17:49:08.738 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.738 blo = 433, bhi = 1101
2025-07-01 17:49:08.739
2025-07-01 17:49:08.739 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.739 g = []
2025-07-01 17:49:08.739 if alo < ahi:
2025-07-01 17:49:08.739 if blo < bhi:
2025-07-01 17:49:08.739 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.739 else:
2025-07-01 17:49:08.739 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.739 elif blo < bhi:
2025-07-01 17:49:08.739 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.739
2025-07-01 17:49:08.739 > yield from g
2025-07-01 17:49:08.739
2025-07-01 17:49:08.739 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.739 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.739
2025-07-01 17:49:08.739 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.739 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.739 alo = 433, ahi = 1101
2025-07-01 17:49:08.739 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.739 blo = 433, bhi = 1101
2025-07-01 17:49:08.739
2025-07-01 17:49:08.740 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.740 r"""
2025-07-01 17:49:08.740 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.740 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.740 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.740 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.740
2025-07-01 17:49:08.740 Example:
2025-07-01 17:49:08.740
2025-07-01 17:49:08.740 >>> d = Differ()
2025-07-01 17:49:08.740 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.740 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.740 >>> print(''.join(results), end="")
2025-07-01 17:49:08.740 - abcDefghiJkl
2025-07-01 17:49:08.740 + abcdefGhijkl
2025-07-01 17:49:08.740 """
2025-07-01 17:49:08.740
2025-07-01 17:49:08.740 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.740 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.741 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.741 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.741 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.741
2025-07-01 17:49:08.741 # search for the pair that matches best without being identical
2025-07-01 17:49:08.741 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.741 # on junk -- unless we have to)
2025-07-01 17:49:08.741 for j in range(blo, bhi):
2025-07-01 17:49:08.741 bj = b[j]
2025-07-01 17:49:08.741 cruncher.set_seq2(bj)
2025-07-01 17:49:08.741 for i in range(alo, ahi):
2025-07-01 17:49:08.741 ai = a[i]
2025-07-01 17:49:08.741 if ai == bj:
2025-07-01 17:49:08.741 if eqi is None:
2025-07-01 17:49:08.741 eqi, eqj = i, j
2025-07-01 17:49:08.741 continue
2025-07-01 17:49:08.741 cruncher.set_seq1(ai)
2025-07-01 17:49:08.741 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.741 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.741 # compares by a factor of 3.
2025-07-01 17:49:08.741 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.742 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.742 # of the computation is cached by cruncher
2025-07-01 17:49:08.742 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.742 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.742 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.742 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.742 if best_ratio < cutoff:
2025-07-01 17:49:08.742 # no non-identical "pretty close" pair
2025-07-01 17:49:08.742 if eqi is None:
2025-07-01 17:49:08.742 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.742 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.742 return
2025-07-01 17:49:08.742 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.742 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.742 else:
2025-07-01 17:49:08.742 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.742 eqi = None
2025-07-01 17:49:08.742
2025-07-01 17:49:08.742 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.742 # identical
2025-07-01 17:49:08.742
2025-07-01 17:49:08.743 # pump out diffs from before the synch point
2025-07-01 17:49:08.743 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.743
2025-07-01 17:49:08.743 # do intraline marking on the synch pair
2025-07-01 17:49:08.743 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.743 if eqi is None:
2025-07-01 17:49:08.743 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.743 atags = btags = ""
2025-07-01 17:49:08.743 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.743 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.743 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.743 if tag == 'replace':
2025-07-01 17:49:08.743 atags += '^' * la
2025-07-01 17:49:08.743 btags += '^' * lb
2025-07-01 17:49:08.743 elif tag == 'delete':
2025-07-01 17:49:08.743 atags += '-' * la
2025-07-01 17:49:08.743 elif tag == 'insert':
2025-07-01 17:49:08.743 btags += '+' * lb
2025-07-01 17:49:08.743 elif tag == 'equal':
2025-07-01 17:49:08.744 atags += ' ' * la
2025-07-01 17:49:08.744 btags += ' ' * lb
2025-07-01 17:49:08.744 else:
2025-07-01 17:49:08.744 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.744 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.744 else:
2025-07-01 17:49:08.744 # the synch pair is identical
2025-07-01 17:49:08.744 yield ' ' + aelt
2025-07-01 17:49:08.744
2025-07-01 17:49:08.744 # pump out diffs from after the synch point
2025-07-01 17:49:08.744 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.744
2025-07-01 17:49:08.744 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.744 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.744
2025-07-01 17:49:08.744 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.744 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.744 alo = 434, ahi = 1101
2025-07-01 17:49:08.744 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.744 blo = 434, bhi = 1101
2025-07-01 17:49:08.745
2025-07-01 17:49:08.745 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.745 g = []
2025-07-01 17:49:08.745 if alo < ahi:
2025-07-01 17:49:08.745 if blo < bhi:
2025-07-01 17:49:08.745 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.745 else:
2025-07-01 17:49:08.745 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.745 elif blo < bhi:
2025-07-01 17:49:08.745 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.745
2025-07-01 17:49:08.745 > yield from g
2025-07-01 17:49:08.745
2025-07-01 17:49:08.745 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.745 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.745
2025-07-01 17:49:08.745 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.745 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.745 alo = 434, ahi = 1101
2025-07-01 17:49:08.745 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.746 blo = 434, bhi = 1101
2025-07-01 17:49:08.749
2025-07-01 17:49:08.749 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.749 r"""
2025-07-01 17:49:08.749 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.749 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.749 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.749 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.749
2025-07-01 17:49:08.749 Example:
2025-07-01 17:49:08.749
2025-07-01 17:49:08.749 >>> d = Differ()
2025-07-01 17:49:08.749 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.749 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.749 >>> print(''.join(results), end="")
2025-07-01 17:49:08.749 - abcDefghiJkl
2025-07-01 17:49:08.749 + abcdefGhijkl
2025-07-01 17:49:08.749 """
2025-07-01 17:49:08.749
2025-07-01 17:49:08.750 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.750 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.750 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.750 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.750 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.750
2025-07-01 17:49:08.750 # search for the pair that matches best without being identical
2025-07-01 17:49:08.750 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.750 # on junk -- unless we have to)
2025-07-01 17:49:08.750 for j in range(blo, bhi):
2025-07-01 17:49:08.750 bj = b[j]
2025-07-01 17:49:08.750 cruncher.set_seq2(bj)
2025-07-01 17:49:08.750 for i in range(alo, ahi):
2025-07-01 17:49:08.750 ai = a[i]
2025-07-01 17:49:08.750 if ai == bj:
2025-07-01 17:49:08.750 if eqi is None:
2025-07-01 17:49:08.750 eqi, eqj = i, j
2025-07-01 17:49:08.750 continue
2025-07-01 17:49:08.750 cruncher.set_seq1(ai)
2025-07-01 17:49:08.750 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.751 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.751 # compares by a factor of 3.
2025-07-01 17:49:08.751 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.751 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.751 # of the computation is cached by cruncher
2025-07-01 17:49:08.751 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.751 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.751 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.751 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.751 if best_ratio < cutoff:
2025-07-01 17:49:08.751 # no non-identical "pretty close" pair
2025-07-01 17:49:08.751 if eqi is None:
2025-07-01 17:49:08.751 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.751 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.751 return
2025-07-01 17:49:08.751 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.751 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.751 else:
2025-07-01 17:49:08.751 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.751 eqi = None
2025-07-01 17:49:08.751
2025-07-01 17:49:08.752 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.752 # identical
2025-07-01 17:49:08.752
2025-07-01 17:49:08.752 # pump out diffs from before the synch point
2025-07-01 17:49:08.752 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.752
2025-07-01 17:49:08.752 # do intraline marking on the synch pair
2025-07-01 17:49:08.753 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.753 if eqi is None:
2025-07-01 17:49:08.753 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.753 atags = btags = ""
2025-07-01 17:49:08.753 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.753 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.753 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.753 if tag == 'replace':
2025-07-01 17:49:08.753 atags += '^' * la
2025-07-01 17:49:08.753 btags += '^' * lb
2025-07-01 17:49:08.753 elif tag == 'delete':
2025-07-01 17:49:08.753 atags += '-' * la
2025-07-01 17:49:08.753 elif tag == 'insert':
2025-07-01 17:49:08.753 btags += '+' * lb
2025-07-01 17:49:08.753 elif tag == 'equal':
2025-07-01 17:49:08.753 atags += ' ' * la
2025-07-01 17:49:08.753 btags += ' ' * lb
2025-07-01 17:49:08.753 else:
2025-07-01 17:49:08.753 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.753 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.754 else:
2025-07-01 17:49:08.754 # the synch pair is identical
2025-07-01 17:49:08.754 yield ' ' + aelt
2025-07-01 17:49:08.754
2025-07-01 17:49:08.754 # pump out diffs from after the synch point
2025-07-01 17:49:08.754 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.754
2025-07-01 17:49:08.754 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.754 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.754
2025-07-01 17:49:08.754 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.754 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.754 alo = 435, ahi = 1101
2025-07-01 17:49:08.754 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.754 blo = 435, bhi = 1101
2025-07-01 17:49:08.754
2025-07-01 17:49:08.754 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.754 g = []
2025-07-01 17:49:08.754 if alo < ahi:
2025-07-01 17:49:08.754 if blo < bhi:
2025-07-01 17:49:08.754 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.755 else:
2025-07-01 17:49:08.755 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.755 elif blo < bhi:
2025-07-01 17:49:08.755 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.755
2025-07-01 17:49:08.755 > yield from g
2025-07-01 17:49:08.755
2025-07-01 17:49:08.755 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.755 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.755
2025-07-01 17:49:08.755 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.755 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.755 alo = 435, ahi = 1101
2025-07-01 17:49:08.755 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.755 blo = 435, bhi = 1101
2025-07-01 17:49:08.755
2025-07-01 17:49:08.755 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.755 r"""
2025-07-01 17:49:08.755 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.756 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.756 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.756 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.756
2025-07-01 17:49:08.756 Example:
2025-07-01 17:49:08.756
2025-07-01 17:49:08.756 >>> d = Differ()
2025-07-01 17:49:08.756 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.756 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.756 >>> print(''.join(results), end="")
2025-07-01 17:49:08.756 - abcDefghiJkl
2025-07-01 17:49:08.756 + abcdefGhijkl
2025-07-01 17:49:08.756 """
2025-07-01 17:49:08.756
2025-07-01 17:49:08.756 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.756 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.756 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.756 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.757 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.757
2025-07-01 17:49:08.757 # search for the pair that matches best without being identical
2025-07-01 17:49:08.757 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.757 # on junk -- unless we have to)
2025-07-01 17:49:08.757 for j in range(blo, bhi):
2025-07-01 17:49:08.757 bj = b[j]
2025-07-01 17:49:08.757 cruncher.set_seq2(bj)
2025-07-01 17:49:08.757 for i in range(alo, ahi):
2025-07-01 17:49:08.757 ai = a[i]
2025-07-01 17:49:08.757 if ai == bj:
2025-07-01 17:49:08.757 if eqi is None:
2025-07-01 17:49:08.757 eqi, eqj = i, j
2025-07-01 17:49:08.757 continue
2025-07-01 17:49:08.757 cruncher.set_seq1(ai)
2025-07-01 17:49:08.757 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.757 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.757 # compares by a factor of 3.
2025-07-01 17:49:08.757 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.757 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.758 # of the computation is cached by cruncher
2025-07-01 17:49:08.758 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.758 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.758 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.758 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.758 if best_ratio < cutoff:
2025-07-01 17:49:08.758 # no non-identical "pretty close" pair
2025-07-01 17:49:08.758 if eqi is None:
2025-07-01 17:49:08.758 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.758 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.758 return
2025-07-01 17:49:08.758 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.758 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.758 else:
2025-07-01 17:49:08.758 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.758 eqi = None
2025-07-01 17:49:08.758
2025-07-01 17:49:08.758 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.758 # identical
2025-07-01 17:49:08.758
2025-07-01 17:49:08.758 # pump out diffs from before the synch point
2025-07-01 17:49:08.759 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.759
2025-07-01 17:49:08.759 # do intraline marking on the synch pair
2025-07-01 17:49:08.759 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.759 if eqi is None:
2025-07-01 17:49:08.759 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.759 atags = btags = ""
2025-07-01 17:49:08.759 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.759 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.759 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.759 if tag == 'replace':
2025-07-01 17:49:08.759 atags += '^' * la
2025-07-01 17:49:08.759 btags += '^' * lb
2025-07-01 17:49:08.759 elif tag == 'delete':
2025-07-01 17:49:08.759 atags += '-' * la
2025-07-01 17:49:08.759 elif tag == 'insert':
2025-07-01 17:49:08.759 btags += '+' * lb
2025-07-01 17:49:08.759 elif tag == 'equal':
2025-07-01 17:49:08.759 atags += ' ' * la
2025-07-01 17:49:08.759 btags += ' ' * lb
2025-07-01 17:49:08.759 else:
2025-07-01 17:49:08.760 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.760 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.760 else:
2025-07-01 17:49:08.760 # the synch pair is identical
2025-07-01 17:49:08.760 yield ' ' + aelt
2025-07-01 17:49:08.760
2025-07-01 17:49:08.760 # pump out diffs from after the synch point
2025-07-01 17:49:08.760 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.760
2025-07-01 17:49:08.760 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.760 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.760
2025-07-01 17:49:08.760 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.760 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.760 alo = 436, ahi = 1101
2025-07-01 17:49:08.760 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.760 blo = 436, bhi = 1101
2025-07-01 17:49:08.760
2025-07-01 17:49:08.760 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.760 g = []
2025-07-01 17:49:08.761 if alo < ahi:
2025-07-01 17:49:08.761 if blo < bhi:
2025-07-01 17:49:08.761 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.761 else:
2025-07-01 17:49:08.761 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.761 elif blo < bhi:
2025-07-01 17:49:08.761 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.761
2025-07-01 17:49:08.761 > yield from g
2025-07-01 17:49:08.761
2025-07-01 17:49:08.761 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.761 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.761
2025-07-01 17:49:08.761 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.761 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.761 alo = 436, ahi = 1101
2025-07-01 17:49:08.761 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.761 blo = 436, bhi = 1101
2025-07-01 17:49:08.761
2025-07-01 17:49:08.761 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.761 r"""
2025-07-01 17:49:08.767 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.767 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.767 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.767 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.767
2025-07-01 17:49:08.767 Example:
2025-07-01 17:49:08.767
2025-07-01 17:49:08.767 >>> d = Differ()
2025-07-01 17:49:08.767 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.767 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.767 >>> print(''.join(results), end="")
2025-07-01 17:49:08.767 - abcDefghiJkl
2025-07-01 17:49:08.767 + abcdefGhijkl
2025-07-01 17:49:08.767 """
2025-07-01 17:49:08.767
2025-07-01 17:49:08.767 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.767 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.767 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.768 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.768 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.768
2025-07-01 17:49:08.768 # search for the pair that matches best without being identical
2025-07-01 17:49:08.768 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.768 # on junk -- unless we have to)
2025-07-01 17:49:08.768 for j in range(blo, bhi):
2025-07-01 17:49:08.768 bj = b[j]
2025-07-01 17:49:08.768 cruncher.set_seq2(bj)
2025-07-01 17:49:08.768 for i in range(alo, ahi):
2025-07-01 17:49:08.768 ai = a[i]
2025-07-01 17:49:08.768 if ai == bj:
2025-07-01 17:49:08.768 if eqi is None:
2025-07-01 17:49:08.768 eqi, eqj = i, j
2025-07-01 17:49:08.768 continue
2025-07-01 17:49:08.768 cruncher.set_seq1(ai)
2025-07-01 17:49:08.768 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.768 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.768 # compares by a factor of 3.
2025-07-01 17:49:08.768 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.768 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.769 # of the computation is cached by cruncher
2025-07-01 17:49:08.769 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.769 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.769 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.769 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.769 if best_ratio < cutoff:
2025-07-01 17:49:08.769 # no non-identical "pretty close" pair
2025-07-01 17:49:08.769 if eqi is None:
2025-07-01 17:49:08.769 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.769 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.769 return
2025-07-01 17:49:08.769 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.769 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.769 else:
2025-07-01 17:49:08.769 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.769 eqi = None
2025-07-01 17:49:08.769
2025-07-01 17:49:08.770 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.770 # identical
2025-07-01 17:49:08.770
2025-07-01 17:49:08.770 # pump out diffs from before the synch point
2025-07-01 17:49:08.770 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.770
2025-07-01 17:49:08.770 # do intraline marking on the synch pair
2025-07-01 17:49:08.770 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.770 if eqi is None:
2025-07-01 17:49:08.770 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.770 atags = btags = ""
2025-07-01 17:49:08.770 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.770 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.770 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.770 if tag == 'replace':
2025-07-01 17:49:08.770 atags += '^' * la
2025-07-01 17:49:08.770 btags += '^' * lb
2025-07-01 17:49:08.770 elif tag == 'delete':
2025-07-01 17:49:08.770 atags += '-' * la
2025-07-01 17:49:08.770 elif tag == 'insert':
2025-07-01 17:49:08.771 btags += '+' * lb
2025-07-01 17:49:08.771 elif tag == 'equal':
2025-07-01 17:49:08.771 atags += ' ' * la
2025-07-01 17:49:08.771 btags += ' ' * lb
2025-07-01 17:49:08.771 else:
2025-07-01 17:49:08.771 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.771 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.771 else:
2025-07-01 17:49:08.771 # the synch pair is identical
2025-07-01 17:49:08.771 yield ' ' + aelt
2025-07-01 17:49:08.771
2025-07-01 17:49:08.771 # pump out diffs from after the synch point
2025-07-01 17:49:08.771 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.771
2025-07-01 17:49:08.771 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.771 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.771
2025-07-01 17:49:08.771 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.771 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.771 alo = 437, ahi = 1101
2025-07-01 17:49:08.771 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.772 blo = 437, bhi = 1101
2025-07-01 17:49:08.772
2025-07-01 17:49:08.772 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.772 g = []
2025-07-01 17:49:08.772 if alo < ahi:
2025-07-01 17:49:08.772 if blo < bhi:
2025-07-01 17:49:08.772 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.772 else:
2025-07-01 17:49:08.772 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.772 elif blo < bhi:
2025-07-01 17:49:08.772 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.772
2025-07-01 17:49:08.772 > yield from g
2025-07-01 17:49:08.772
2025-07-01 17:49:08.772 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.772 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.772
2025-07-01 17:49:08.772 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.772 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.772 alo = 437, ahi = 1101
2025-07-01 17:49:08.773 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.773 blo = 437, bhi = 1101
2025-07-01 17:49:08.773
2025-07-01 17:49:08.773 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.773 r"""
2025-07-01 17:49:08.773 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.773 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.773 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.773 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.773
2025-07-01 17:49:08.773 Example:
2025-07-01 17:49:08.773
2025-07-01 17:49:08.773 >>> d = Differ()
2025-07-01 17:49:08.773 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.773 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.773 >>> print(''.join(results), end="")
2025-07-01 17:49:08.773 - abcDefghiJkl
2025-07-01 17:49:08.773 + abcdefGhijkl
2025-07-01 17:49:08.773 """
2025-07-01 17:49:08.773
2025-07-01 17:49:08.774 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.774 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.774 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.774 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.774 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.774
2025-07-01 17:49:08.774 # search for the pair that matches best without being identical
2025-07-01 17:49:08.774 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.774 # on junk -- unless we have to)
2025-07-01 17:49:08.774 for j in range(blo, bhi):
2025-07-01 17:49:08.774 bj = b[j]
2025-07-01 17:49:08.774 cruncher.set_seq2(bj)
2025-07-01 17:49:08.774 for i in range(alo, ahi):
2025-07-01 17:49:08.774 ai = a[i]
2025-07-01 17:49:08.774 if ai == bj:
2025-07-01 17:49:08.774 if eqi is None:
2025-07-01 17:49:08.774 eqi, eqj = i, j
2025-07-01 17:49:08.774 continue
2025-07-01 17:49:08.774 cruncher.set_seq1(ai)
2025-07-01 17:49:08.774 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.774 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.775 # compares by a factor of 3.
2025-07-01 17:49:08.775 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.775 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.775 # of the computation is cached by cruncher
2025-07-01 17:49:08.775 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.775 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.775 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.775 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.775 if best_ratio < cutoff:
2025-07-01 17:49:08.775 # no non-identical "pretty close" pair
2025-07-01 17:49:08.775 if eqi is None:
2025-07-01 17:49:08.775 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.775 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.775 return
2025-07-01 17:49:08.775 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.775 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.775 else:
2025-07-01 17:49:08.775 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.775 eqi = None
2025-07-01 17:49:08.775
2025-07-01 17:49:08.775 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.776 # identical
2025-07-01 17:49:08.776
2025-07-01 17:49:08.776 # pump out diffs from before the synch point
2025-07-01 17:49:08.776 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.776
2025-07-01 17:49:08.776 # do intraline marking on the synch pair
2025-07-01 17:49:08.776 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.776 if eqi is None:
2025-07-01 17:49:08.776 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.776 atags = btags = ""
2025-07-01 17:49:08.776 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.776 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.776 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.776 if tag == 'replace':
2025-07-01 17:49:08.776 atags += '^' * la
2025-07-01 17:49:08.776 btags += '^' * lb
2025-07-01 17:49:08.776 elif tag == 'delete':
2025-07-01 17:49:08.776 atags += '-' * la
2025-07-01 17:49:08.776 elif tag == 'insert':
2025-07-01 17:49:08.776 btags += '+' * lb
2025-07-01 17:49:08.776 elif tag == 'equal':
2025-07-01 17:49:08.777 atags += ' ' * la
2025-07-01 17:49:08.779 btags += ' ' * lb
2025-07-01 17:49:08.780 else:
2025-07-01 17:49:08.780 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.780 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.780 else:
2025-07-01 17:49:08.780 # the synch pair is identical
2025-07-01 17:49:08.780 yield ' ' + aelt
2025-07-01 17:49:08.780
2025-07-01 17:49:08.780 # pump out diffs from after the synch point
2025-07-01 17:49:08.780 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.780
2025-07-01 17:49:08.780 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.780 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.780
2025-07-01 17:49:08.780 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.780 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.780 alo = 438, ahi = 1101
2025-07-01 17:49:08.780 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.781 blo = 438, bhi = 1101
2025-07-01 17:49:08.781
2025-07-01 17:49:08.781 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.781 g = []
2025-07-01 17:49:08.781 if alo < ahi:
2025-07-01 17:49:08.781 if blo < bhi:
2025-07-01 17:49:08.781 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.781 else:
2025-07-01 17:49:08.781 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.781 elif blo < bhi:
2025-07-01 17:49:08.781 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.781
2025-07-01 17:49:08.781 > yield from g
2025-07-01 17:49:08.781
2025-07-01 17:49:08.781 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.781 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.781
2025-07-01 17:49:08.781 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.781 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.781 alo = 438, ahi = 1101
2025-07-01 17:49:08.782 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.782 blo = 438, bhi = 1101
2025-07-01 17:49:08.782
2025-07-01 17:49:08.782 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.782 r"""
2025-07-01 17:49:08.782 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.782 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.782 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.782 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.782
2025-07-01 17:49:08.782 Example:
2025-07-01 17:49:08.782
2025-07-01 17:49:08.782 >>> d = Differ()
2025-07-01 17:49:08.782 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.782 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.782 >>> print(''.join(results), end="")
2025-07-01 17:49:08.782 - abcDefghiJkl
2025-07-01 17:49:08.782 + abcdefGhijkl
2025-07-01 17:49:08.783 """
2025-07-01 17:49:08.783
2025-07-01 17:49:08.783 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.783 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.783 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.783 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.783 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.783
2025-07-01 17:49:08.783 # search for the pair that matches best without being identical
2025-07-01 17:49:08.783 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.783 # on junk -- unless we have to)
2025-07-01 17:49:08.783 for j in range(blo, bhi):
2025-07-01 17:49:08.783 bj = b[j]
2025-07-01 17:49:08.783 cruncher.set_seq2(bj)
2025-07-01 17:49:08.783 for i in range(alo, ahi):
2025-07-01 17:49:08.783 ai = a[i]
2025-07-01 17:49:08.783 if ai == bj:
2025-07-01 17:49:08.783 if eqi is None:
2025-07-01 17:49:08.783 eqi, eqj = i, j
2025-07-01 17:49:08.783 continue
2025-07-01 17:49:08.784 cruncher.set_seq1(ai)
2025-07-01 17:49:08.784 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.784 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.784 # compares by a factor of 3.
2025-07-01 17:49:08.784 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.784 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.784 # of the computation is cached by cruncher
2025-07-01 17:49:08.784 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.784 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.784 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.784 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.784 if best_ratio < cutoff:
2025-07-01 17:49:08.784 # no non-identical "pretty close" pair
2025-07-01 17:49:08.784 if eqi is None:
2025-07-01 17:49:08.784 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.784 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.784 return
2025-07-01 17:49:08.784 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.784 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.784 else:
2025-07-01 17:49:08.785 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.785 eqi = None
2025-07-01 17:49:08.785
2025-07-01 17:49:08.785 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.785 # identical
2025-07-01 17:49:08.785
2025-07-01 17:49:08.785 # pump out diffs from before the synch point
2025-07-01 17:49:08.785 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.785
2025-07-01 17:49:08.785 # do intraline marking on the synch pair
2025-07-01 17:49:08.785 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.785 if eqi is None:
2025-07-01 17:49:08.785 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.785 atags = btags = ""
2025-07-01 17:49:08.785 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.785 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.785 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.785 if tag == 'replace':
2025-07-01 17:49:08.785 atags += '^' * la
2025-07-01 17:49:08.785 btags += '^' * lb
2025-07-01 17:49:08.785 elif tag == 'delete':
2025-07-01 17:49:08.785 atags += '-' * la
2025-07-01 17:49:08.786 elif tag == 'insert':
2025-07-01 17:49:08.786 btags += '+' * lb
2025-07-01 17:49:08.786 elif tag == 'equal':
2025-07-01 17:49:08.786 atags += ' ' * la
2025-07-01 17:49:08.786 btags += ' ' * lb
2025-07-01 17:49:08.786 else:
2025-07-01 17:49:08.786 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.786 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.786 else:
2025-07-01 17:49:08.786 # the synch pair is identical
2025-07-01 17:49:08.786 yield ' ' + aelt
2025-07-01 17:49:08.786
2025-07-01 17:49:08.786 # pump out diffs from after the synch point
2025-07-01 17:49:08.786 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.786
2025-07-01 17:49:08.786 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.786 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.786
2025-07-01 17:49:08.786 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.786 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.787 alo = 439, ahi = 1101
2025-07-01 17:49:08.787 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.787 blo = 439, bhi = 1101
2025-07-01 17:49:08.787
2025-07-01 17:49:08.787 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.787 g = []
2025-07-01 17:49:08.787 if alo < ahi:
2025-07-01 17:49:08.787 if blo < bhi:
2025-07-01 17:49:08.787 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.787 else:
2025-07-01 17:49:08.787 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.787 elif blo < bhi:
2025-07-01 17:49:08.787 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.787
2025-07-01 17:49:08.787 > yield from g
2025-07-01 17:49:08.787
2025-07-01 17:49:08.787 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.787 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.787
2025-07-01 17:49:08.787 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.787 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.787 alo = 439, ahi = 1101
2025-07-01 17:49:08.788 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.788 blo = 439, bhi = 1101
2025-07-01 17:49:08.788
2025-07-01 17:49:08.788 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.788 r"""
2025-07-01 17:49:08.788 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.788 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.788 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.788 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.788
2025-07-01 17:49:08.788 Example:
2025-07-01 17:49:08.788
2025-07-01 17:49:08.788 >>> d = Differ()
2025-07-01 17:49:08.788 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.788 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.788 >>> print(''.join(results), end="")
2025-07-01 17:49:08.788 - abcDefghiJkl
2025-07-01 17:49:08.788 + abcdefGhijkl
2025-07-01 17:49:08.788 """
2025-07-01 17:49:08.789
2025-07-01 17:49:08.789 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.789 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.789 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.789 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.789 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.789
2025-07-01 17:49:08.789 # search for the pair that matches best without being identical
2025-07-01 17:49:08.789 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.789 # on junk -- unless we have to)
2025-07-01 17:49:08.789 for j in range(blo, bhi):
2025-07-01 17:49:08.789 bj = b[j]
2025-07-01 17:49:08.789 cruncher.set_seq2(bj)
2025-07-01 17:49:08.789 for i in range(alo, ahi):
2025-07-01 17:49:08.789 ai = a[i]
2025-07-01 17:49:08.789 if ai == bj:
2025-07-01 17:49:08.789 if eqi is None:
2025-07-01 17:49:08.789 eqi, eqj = i, j
2025-07-01 17:49:08.790 continue
2025-07-01 17:49:08.790 cruncher.set_seq1(ai)
2025-07-01 17:49:08.790 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.790 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.790 # compares by a factor of 3.
2025-07-01 17:49:08.790 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.790 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.790 # of the computation is cached by cruncher
2025-07-01 17:49:08.790 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.790 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.790 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.790 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.790 if best_ratio < cutoff:
2025-07-01 17:49:08.790 # no non-identical "pretty close" pair
2025-07-01 17:49:08.790 if eqi is None:
2025-07-01 17:49:08.790 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.790 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.790 return
2025-07-01 17:49:08.790 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.790 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.791 else:
2025-07-01 17:49:08.791 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.791 eqi = None
2025-07-01 17:49:08.791
2025-07-01 17:49:08.791 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.791 # identical
2025-07-01 17:49:08.791
2025-07-01 17:49:08.791 # pump out diffs from before the synch point
2025-07-01 17:49:08.791 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.791
2025-07-01 17:49:08.791 # do intraline marking on the synch pair
2025-07-01 17:49:08.791 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.791 if eqi is None:
2025-07-01 17:49:08.791 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.791 atags = btags = ""
2025-07-01 17:49:08.791 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.791 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.791 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.791 if tag == 'replace':
2025-07-01 17:49:08.791 atags += '^' * la
2025-07-01 17:49:08.791 btags += '^' * lb
2025-07-01 17:49:08.791 elif tag == 'delete':
2025-07-01 17:49:08.792 atags += '-' * la
2025-07-01 17:49:08.792 elif tag == 'insert':
2025-07-01 17:49:08.792 btags += '+' * lb
2025-07-01 17:49:08.792 elif tag == 'equal':
2025-07-01 17:49:08.792 atags += ' ' * la
2025-07-01 17:49:08.792 btags += ' ' * lb
2025-07-01 17:49:08.792 else:
2025-07-01 17:49:08.792 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.792 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.792 else:
2025-07-01 17:49:08.792 # the synch pair is identical
2025-07-01 17:49:08.792 yield ' ' + aelt
2025-07-01 17:49:08.792
2025-07-01 17:49:08.792 # pump out diffs from after the synch point
2025-07-01 17:49:08.792 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.792
2025-07-01 17:49:08.792 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.792 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.792
2025-07-01 17:49:08.792 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.793 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.798 alo = 440, ahi = 1101
2025-07-01 17:49:08.798 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.798 blo = 440, bhi = 1101
2025-07-01 17:49:08.798
2025-07-01 17:49:08.798 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.798 g = []
2025-07-01 17:49:08.798 if alo < ahi:
2025-07-01 17:49:08.798 if blo < bhi:
2025-07-01 17:49:08.798 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.798 else:
2025-07-01 17:49:08.798 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.798 elif blo < bhi:
2025-07-01 17:49:08.798 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.798
2025-07-01 17:49:08.798 > yield from g
2025-07-01 17:49:08.798
2025-07-01 17:49:08.798 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.798 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.798
2025-07-01 17:49:08.799 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.799 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.799 alo = 440, ahi = 1101
2025-07-01 17:49:08.799 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.799 blo = 440, bhi = 1101
2025-07-01 17:49:08.799
2025-07-01 17:49:08.799 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.799 r"""
2025-07-01 17:49:08.799 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.799 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.799 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.799 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.799
2025-07-01 17:49:08.799 Example:
2025-07-01 17:49:08.799
2025-07-01 17:49:08.799 >>> d = Differ()
2025-07-01 17:49:08.799 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.799 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.799 >>> print(''.join(results), end="")
2025-07-01 17:49:08.799 - abcDefghiJkl
2025-07-01 17:49:08.800 + abcdefGhijkl
2025-07-01 17:49:08.800 """
2025-07-01 17:49:08.800
2025-07-01 17:49:08.800 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.800 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.800 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.800 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.800 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.800
2025-07-01 17:49:08.800 # search for the pair that matches best without being identical
2025-07-01 17:49:08.800 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.800 # on junk -- unless we have to)
2025-07-01 17:49:08.800 for j in range(blo, bhi):
2025-07-01 17:49:08.800 bj = b[j]
2025-07-01 17:49:08.800 cruncher.set_seq2(bj)
2025-07-01 17:49:08.800 for i in range(alo, ahi):
2025-07-01 17:49:08.800 ai = a[i]
2025-07-01 17:49:08.800 if ai == bj:
2025-07-01 17:49:08.800 if eqi is None:
2025-07-01 17:49:08.800 eqi, eqj = i, j
2025-07-01 17:49:08.801 continue
2025-07-01 17:49:08.801 cruncher.set_seq1(ai)
2025-07-01 17:49:08.801 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.801 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.801 # compares by a factor of 3.
2025-07-01 17:49:08.801 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.801 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.801 # of the computation is cached by cruncher
2025-07-01 17:49:08.801 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.801 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.801 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.801 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.801 if best_ratio < cutoff:
2025-07-01 17:49:08.801 # no non-identical "pretty close" pair
2025-07-01 17:49:08.801 if eqi is None:
2025-07-01 17:49:08.801 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.801 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.801 return
2025-07-01 17:49:08.801 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.801 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.801 else:
2025-07-01 17:49:08.802 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.802 eqi = None
2025-07-01 17:49:08.802
2025-07-01 17:49:08.802 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.802 # identical
2025-07-01 17:49:08.802
2025-07-01 17:49:08.802 # pump out diffs from before the synch point
2025-07-01 17:49:08.802 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.802
2025-07-01 17:49:08.802 # do intraline marking on the synch pair
2025-07-01 17:49:08.802 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.802 if eqi is None:
2025-07-01 17:49:08.802 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.802 atags = btags = ""
2025-07-01 17:49:08.802 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.802 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.802 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.802 if tag == 'replace':
2025-07-01 17:49:08.802 atags += '^' * la
2025-07-01 17:49:08.803 btags += '^' * lb
2025-07-01 17:49:08.803 elif tag == 'delete':
2025-07-01 17:49:08.803 atags += '-' * la
2025-07-01 17:49:08.803 elif tag == 'insert':
2025-07-01 17:49:08.803 btags += '+' * lb
2025-07-01 17:49:08.803 elif tag == 'equal':
2025-07-01 17:49:08.803 atags += ' ' * la
2025-07-01 17:49:08.803 btags += ' ' * lb
2025-07-01 17:49:08.803 else:
2025-07-01 17:49:08.803 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.803 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.803 else:
2025-07-01 17:49:08.803 # the synch pair is identical
2025-07-01 17:49:08.803 yield ' ' + aelt
2025-07-01 17:49:08.803
2025-07-01 17:49:08.803 # pump out diffs from after the synch point
2025-07-01 17:49:08.803 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.803
2025-07-01 17:49:08.803 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.803 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.803
2025-07-01 17:49:08.804 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.804 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.804 alo = 441, ahi = 1101
2025-07-01 17:49:08.804 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.804 blo = 441, bhi = 1101
2025-07-01 17:49:08.804
2025-07-01 17:49:08.804 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.804 g = []
2025-07-01 17:49:08.804 if alo < ahi:
2025-07-01 17:49:08.804 if blo < bhi:
2025-07-01 17:49:08.804 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.804 else:
2025-07-01 17:49:08.804 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.804 elif blo < bhi:
2025-07-01 17:49:08.804 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.804
2025-07-01 17:49:08.804 > yield from g
2025-07-01 17:49:08.804
2025-07-01 17:49:08.804 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.804 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.805
2025-07-01 17:49:08.805 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.805 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.805 alo = 441, ahi = 1101
2025-07-01 17:49:08.805 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.805 blo = 441, bhi = 1101
2025-07-01 17:49:08.805
2025-07-01 17:49:08.805 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.805 r"""
2025-07-01 17:49:08.805 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.805 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.805 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.805 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.805
2025-07-01 17:49:08.805 Example:
2025-07-01 17:49:08.805
2025-07-01 17:49:08.805 >>> d = Differ()
2025-07-01 17:49:08.805 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.805 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.805 >>> print(''.join(results), end="")
2025-07-01 17:49:08.805 - abcDefghiJkl
2025-07-01 17:49:08.806 + abcdefGhijkl
2025-07-01 17:49:08.806 """
2025-07-01 17:49:08.806
2025-07-01 17:49:08.806 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.806 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.806 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.806 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.806 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.806
2025-07-01 17:49:08.806 # search for the pair that matches best without being identical
2025-07-01 17:49:08.806 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.806 # on junk -- unless we have to)
2025-07-01 17:49:08.806 for j in range(blo, bhi):
2025-07-01 17:49:08.806 bj = b[j]
2025-07-01 17:49:08.806 cruncher.set_seq2(bj)
2025-07-01 17:49:08.806 for i in range(alo, ahi):
2025-07-01 17:49:08.806 ai = a[i]
2025-07-01 17:49:08.806 if ai == bj:
2025-07-01 17:49:08.806 if eqi is None:
2025-07-01 17:49:08.806 eqi, eqj = i, j
2025-07-01 17:49:08.807 continue
2025-07-01 17:49:08.807 cruncher.set_seq1(ai)
2025-07-01 17:49:08.807 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.807 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.807 # compares by a factor of 3.
2025-07-01 17:49:08.807 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.807 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.807 # of the computation is cached by cruncher
2025-07-01 17:49:08.807 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.807 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.807 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.807 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.807 if best_ratio < cutoff:
2025-07-01 17:49:08.807 # no non-identical "pretty close" pair
2025-07-01 17:49:08.807 if eqi is None:
2025-07-01 17:49:08.807 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.807 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.807 return
2025-07-01 17:49:08.807 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.807 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.807 else:
2025-07-01 17:49:08.808 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.811 eqi = None
2025-07-01 17:49:08.811
2025-07-01 17:49:08.811 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.811 # identical
2025-07-01 17:49:08.811
2025-07-01 17:49:08.811 # pump out diffs from before the synch point
2025-07-01 17:49:08.811 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.812
2025-07-01 17:49:08.812 # do intraline marking on the synch pair
2025-07-01 17:49:08.812 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.812 if eqi is None:
2025-07-01 17:49:08.812 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.812 atags = btags = ""
2025-07-01 17:49:08.812 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.812 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.812 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.812 if tag == 'replace':
2025-07-01 17:49:08.812 atags += '^' * la
2025-07-01 17:49:08.812 btags += '^' * lb
2025-07-01 17:49:08.812 elif tag == 'delete':
2025-07-01 17:49:08.812 atags += '-' * la
2025-07-01 17:49:08.812 elif tag == 'insert':
2025-07-01 17:49:08.812 btags += '+' * lb
2025-07-01 17:49:08.812 elif tag == 'equal':
2025-07-01 17:49:08.812 atags += ' ' * la
2025-07-01 17:49:08.812 btags += ' ' * lb
2025-07-01 17:49:08.812 else:
2025-07-01 17:49:08.813 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.813 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.813 else:
2025-07-01 17:49:08.813 # the synch pair is identical
2025-07-01 17:49:08.813 yield ' ' + aelt
2025-07-01 17:49:08.813
2025-07-01 17:49:08.813 # pump out diffs from after the synch point
2025-07-01 17:49:08.813 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.813
2025-07-01 17:49:08.813 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.813 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.813
2025-07-01 17:49:08.813 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.813 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.813 alo = 444, ahi = 1101
2025-07-01 17:49:08.813 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.813 blo = 444, bhi = 1101
2025-07-01 17:49:08.813
2025-07-01 17:49:08.813 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.813 g = []
2025-07-01 17:49:08.814 if alo < ahi:
2025-07-01 17:49:08.814 if blo < bhi:
2025-07-01 17:49:08.814 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.814 else:
2025-07-01 17:49:08.814 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.814 elif blo < bhi:
2025-07-01 17:49:08.814 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.814
2025-07-01 17:49:08.814 > yield from g
2025-07-01 17:49:08.814
2025-07-01 17:49:08.814 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.814 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.814
2025-07-01 17:49:08.814 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.814 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.814 alo = 444, ahi = 1101
2025-07-01 17:49:08.814 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.814 blo = 444, bhi = 1101
2025-07-01 17:49:08.815
2025-07-01 17:49:08.815 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.815 r"""
2025-07-01 17:49:08.815 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.815 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.815 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.815 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.815
2025-07-01 17:49:08.815 Example:
2025-07-01 17:49:08.815
2025-07-01 17:49:08.815 >>> d = Differ()
2025-07-01 17:49:08.815 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.815 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.815 >>> print(''.join(results), end="")
2025-07-01 17:49:08.815 - abcDefghiJkl
2025-07-01 17:49:08.815 + abcdefGhijkl
2025-07-01 17:49:08.815 """
2025-07-01 17:49:08.815
2025-07-01 17:49:08.816 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.816 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.816 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.816 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.816 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.816
2025-07-01 17:49:08.816 # search for the pair that matches best without being identical
2025-07-01 17:49:08.816 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.816 # on junk -- unless we have to)
2025-07-01 17:49:08.816 for j in range(blo, bhi):
2025-07-01 17:49:08.816 bj = b[j]
2025-07-01 17:49:08.816 cruncher.set_seq2(bj)
2025-07-01 17:49:08.816 for i in range(alo, ahi):
2025-07-01 17:49:08.816 ai = a[i]
2025-07-01 17:49:08.816 if ai == bj:
2025-07-01 17:49:08.816 if eqi is None:
2025-07-01 17:49:08.816 eqi, eqj = i, j
2025-07-01 17:49:08.816 continue
2025-07-01 17:49:08.816 cruncher.set_seq1(ai)
2025-07-01 17:49:08.816 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.817 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.817 # compares by a factor of 3.
2025-07-01 17:49:08.817 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.817 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.817 # of the computation is cached by cruncher
2025-07-01 17:49:08.817 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.817 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.817 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.817 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.817 if best_ratio < cutoff:
2025-07-01 17:49:08.817 # no non-identical "pretty close" pair
2025-07-01 17:49:08.817 if eqi is None:
2025-07-01 17:49:08.817 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.817 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.817 return
2025-07-01 17:49:08.817 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.817 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.817 else:
2025-07-01 17:49:08.817 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.817 eqi = None
2025-07-01 17:49:08.817
2025-07-01 17:49:08.818 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.818 # identical
2025-07-01 17:49:08.818
2025-07-01 17:49:08.818 # pump out diffs from before the synch point
2025-07-01 17:49:08.818 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.818
2025-07-01 17:49:08.818 # do intraline marking on the synch pair
2025-07-01 17:49:08.818 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.818 if eqi is None:
2025-07-01 17:49:08.818 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.818 atags = btags = ""
2025-07-01 17:49:08.818 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.818 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.818 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.818 if tag == 'replace':
2025-07-01 17:49:08.818 atags += '^' * la
2025-07-01 17:49:08.818 btags += '^' * lb
2025-07-01 17:49:08.818 elif tag == 'delete':
2025-07-01 17:49:08.818 atags += '-' * la
2025-07-01 17:49:08.818 elif tag == 'insert':
2025-07-01 17:49:08.818 btags += '+' * lb
2025-07-01 17:49:08.819 elif tag == 'equal':
2025-07-01 17:49:08.819 atags += ' ' * la
2025-07-01 17:49:08.819 btags += ' ' * lb
2025-07-01 17:49:08.819 else:
2025-07-01 17:49:08.819 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.819 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.819 else:
2025-07-01 17:49:08.819 # the synch pair is identical
2025-07-01 17:49:08.819 yield ' ' + aelt
2025-07-01 17:49:08.819
2025-07-01 17:49:08.819 # pump out diffs from after the synch point
2025-07-01 17:49:08.819 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.819
2025-07-01 17:49:08.819 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.819 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.819
2025-07-01 17:49:08.819 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.819 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.819 alo = 445, ahi = 1101
2025-07-01 17:49:08.819 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.819 blo = 445, bhi = 1101
2025-07-01 17:49:08.820
2025-07-01 17:49:08.820 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.820 g = []
2025-07-01 17:49:08.820 if alo < ahi:
2025-07-01 17:49:08.820 if blo < bhi:
2025-07-01 17:49:08.820 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.820 else:
2025-07-01 17:49:08.820 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.820 elif blo < bhi:
2025-07-01 17:49:08.820 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.820
2025-07-01 17:49:08.820 > yield from g
2025-07-01 17:49:08.820
2025-07-01 17:49:08.820 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.820 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.820
2025-07-01 17:49:08.820 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.820 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.820 alo = 445, ahi = 1101
2025-07-01 17:49:08.820 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.820 blo = 445, bhi = 1101
2025-07-01 17:49:08.820
2025-07-01 17:49:08.821 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.821 r"""
2025-07-01 17:49:08.821 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.821 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.821 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.821 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.821
2025-07-01 17:49:08.821 Example:
2025-07-01 17:49:08.821
2025-07-01 17:49:08.821 >>> d = Differ()
2025-07-01 17:49:08.821 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.821 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.821 >>> print(''.join(results), end="")
2025-07-01 17:49:08.821 - abcDefghiJkl
2025-07-01 17:49:08.821 + abcdefGhijkl
2025-07-01 17:49:08.821 """
2025-07-01 17:49:08.821
2025-07-01 17:49:08.821 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.821 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.822 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.822 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.822 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.822
2025-07-01 17:49:08.822 # search for the pair that matches best without being identical
2025-07-01 17:49:08.822 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.822 # on junk -- unless we have to)
2025-07-01 17:49:08.822 for j in range(blo, bhi):
2025-07-01 17:49:08.822 bj = b[j]
2025-07-01 17:49:08.822 cruncher.set_seq2(bj)
2025-07-01 17:49:08.822 for i in range(alo, ahi):
2025-07-01 17:49:08.822 ai = a[i]
2025-07-01 17:49:08.822 if ai == bj:
2025-07-01 17:49:08.822 if eqi is None:
2025-07-01 17:49:08.822 eqi, eqj = i, j
2025-07-01 17:49:08.822 continue
2025-07-01 17:49:08.822 cruncher.set_seq1(ai)
2025-07-01 17:49:08.822 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.822 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.822 # compares by a factor of 3.
2025-07-01 17:49:08.822 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.823 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.823 # of the computation is cached by cruncher
2025-07-01 17:49:08.823 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.823 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.823 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.823 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.823 if best_ratio < cutoff:
2025-07-01 17:49:08.823 # no non-identical "pretty close" pair
2025-07-01 17:49:08.823 if eqi is None:
2025-07-01 17:49:08.823 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.823 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.823 return
2025-07-01 17:49:08.823 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.823 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.823 else:
2025-07-01 17:49:08.823 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.823 eqi = None
2025-07-01 17:49:08.823
2025-07-01 17:49:08.823 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.824 # identical
2025-07-01 17:49:08.828
2025-07-01 17:49:08.829 # pump out diffs from before the synch point
2025-07-01 17:49:08.829 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.829
2025-07-01 17:49:08.829 # do intraline marking on the synch pair
2025-07-01 17:49:08.829 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.829 if eqi is None:
2025-07-01 17:49:08.829 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.829 atags = btags = ""
2025-07-01 17:49:08.829 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.829 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.829 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.829 if tag == 'replace':
2025-07-01 17:49:08.829 atags += '^' * la
2025-07-01 17:49:08.829 btags += '^' * lb
2025-07-01 17:49:08.829 elif tag == 'delete':
2025-07-01 17:49:08.829 atags += '-' * la
2025-07-01 17:49:08.829 elif tag == 'insert':
2025-07-01 17:49:08.829 btags += '+' * lb
2025-07-01 17:49:08.829 elif tag == 'equal':
2025-07-01 17:49:08.829 atags += ' ' * la
2025-07-01 17:49:08.830 btags += ' ' * lb
2025-07-01 17:49:08.830 else:
2025-07-01 17:49:08.830 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.830 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.830 else:
2025-07-01 17:49:08.830 # the synch pair is identical
2025-07-01 17:49:08.830 yield ' ' + aelt
2025-07-01 17:49:08.830
2025-07-01 17:49:08.830 # pump out diffs from after the synch point
2025-07-01 17:49:08.830 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.830
2025-07-01 17:49:08.830 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.830 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.830
2025-07-01 17:49:08.830 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.830 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.830 alo = 446, ahi = 1101
2025-07-01 17:49:08.830 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.830 blo = 446, bhi = 1101
2025-07-01 17:49:08.830
2025-07-01 17:49:08.830 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.831 g = []
2025-07-01 17:49:08.831 if alo < ahi:
2025-07-01 17:49:08.831 if blo < bhi:
2025-07-01 17:49:08.831 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.831 else:
2025-07-01 17:49:08.831 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.831 elif blo < bhi:
2025-07-01 17:49:08.831 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.831
2025-07-01 17:49:08.831 > yield from g
2025-07-01 17:49:08.831
2025-07-01 17:49:08.831 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.831 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.831
2025-07-01 17:49:08.831 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.831 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.831 alo = 446, ahi = 1101
2025-07-01 17:49:08.831 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.831 blo = 446, bhi = 1101
2025-07-01 17:49:08.831
2025-07-01 17:49:08.831 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.832 r"""
2025-07-01 17:49:08.832 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.832 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.832 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.832 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.832
2025-07-01 17:49:08.832 Example:
2025-07-01 17:49:08.832
2025-07-01 17:49:08.832 >>> d = Differ()
2025-07-01 17:49:08.832 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.832 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.832 >>> print(''.join(results), end="")
2025-07-01 17:49:08.832 - abcDefghiJkl
2025-07-01 17:49:08.832 + abcdefGhijkl
2025-07-01 17:49:08.832 """
2025-07-01 17:49:08.832
2025-07-01 17:49:08.832 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.832 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.832 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.832 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.833 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.833
2025-07-01 17:49:08.833 # search for the pair that matches best without being identical
2025-07-01 17:49:08.833 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.833 # on junk -- unless we have to)
2025-07-01 17:49:08.833 for j in range(blo, bhi):
2025-07-01 17:49:08.833 bj = b[j]
2025-07-01 17:49:08.833 cruncher.set_seq2(bj)
2025-07-01 17:49:08.833 for i in range(alo, ahi):
2025-07-01 17:49:08.833 ai = a[i]
2025-07-01 17:49:08.833 if ai == bj:
2025-07-01 17:49:08.833 if eqi is None:
2025-07-01 17:49:08.833 eqi, eqj = i, j
2025-07-01 17:49:08.833 continue
2025-07-01 17:49:08.833 cruncher.set_seq1(ai)
2025-07-01 17:49:08.833 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.833 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.833 # compares by a factor of 3.
2025-07-01 17:49:08.833 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.833 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.833 # of the computation is cached by cruncher
2025-07-01 17:49:08.834 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.834 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.834 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.834 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.834 if best_ratio < cutoff:
2025-07-01 17:49:08.834 # no non-identical "pretty close" pair
2025-07-01 17:49:08.834 if eqi is None:
2025-07-01 17:49:08.834 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.834 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.834 return
2025-07-01 17:49:08.834 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.834 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.834 else:
2025-07-01 17:49:08.834 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.834 eqi = None
2025-07-01 17:49:08.834
2025-07-01 17:49:08.834 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.834 # identical
2025-07-01 17:49:08.834
2025-07-01 17:49:08.834 # pump out diffs from before the synch point
2025-07-01 17:49:08.834 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.835
2025-07-01 17:49:08.835 # do intraline marking on the synch pair
2025-07-01 17:49:08.835 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.835 if eqi is None:
2025-07-01 17:49:08.835 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.835 atags = btags = ""
2025-07-01 17:49:08.835 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.835 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.835 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.835 if tag == 'replace':
2025-07-01 17:49:08.835 atags += '^' * la
2025-07-01 17:49:08.835 btags += '^' * lb
2025-07-01 17:49:08.835 elif tag == 'delete':
2025-07-01 17:49:08.835 atags += '-' * la
2025-07-01 17:49:08.835 elif tag == 'insert':
2025-07-01 17:49:08.835 btags += '+' * lb
2025-07-01 17:49:08.835 elif tag == 'equal':
2025-07-01 17:49:08.835 atags += ' ' * la
2025-07-01 17:49:08.835 btags += ' ' * lb
2025-07-01 17:49:08.835 else:
2025-07-01 17:49:08.835 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.835 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.836 else:
2025-07-01 17:49:08.836 # the synch pair is identical
2025-07-01 17:49:08.836 yield ' ' + aelt
2025-07-01 17:49:08.836
2025-07-01 17:49:08.836 # pump out diffs from after the synch point
2025-07-01 17:49:08.836 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.836
2025-07-01 17:49:08.836 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.836 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.836
2025-07-01 17:49:08.836 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.836 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.836 alo = 447, ahi = 1101
2025-07-01 17:49:08.836 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.836 blo = 447, bhi = 1101
2025-07-01 17:49:08.836
2025-07-01 17:49:08.836 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.836 g = []
2025-07-01 17:49:08.837 if alo < ahi:
2025-07-01 17:49:08.837 if blo < bhi:
2025-07-01 17:49:08.837 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.837 else:
2025-07-01 17:49:08.837 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.837 elif blo < bhi:
2025-07-01 17:49:08.837 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.837
2025-07-01 17:49:08.837 > yield from g
2025-07-01 17:49:08.837
2025-07-01 17:49:08.837 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.837 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.837
2025-07-01 17:49:08.837 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.837 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.837 alo = 447, ahi = 1101
2025-07-01 17:49:08.837 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.837 blo = 447, bhi = 1101
2025-07-01 17:49:08.837
2025-07-01 17:49:08.837 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.837 r"""
2025-07-01 17:49:08.837 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.838 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.838 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.838 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.838
2025-07-01 17:49:08.838 Example:
2025-07-01 17:49:08.838
2025-07-01 17:49:08.838 >>> d = Differ()
2025-07-01 17:49:08.838 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.838 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.838 >>> print(''.join(results), end="")
2025-07-01 17:49:08.838 - abcDefghiJkl
2025-07-01 17:49:08.838 + abcdefGhijkl
2025-07-01 17:49:08.838 """
2025-07-01 17:49:08.838
2025-07-01 17:49:08.838 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.838 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.838 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.838 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.838 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.838
2025-07-01 17:49:08.842 # search for the pair that matches best without being identical
2025-07-01 17:49:08.842 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.842 # on junk -- unless we have to)
2025-07-01 17:49:08.842 for j in range(blo, bhi):
2025-07-01 17:49:08.842 bj = b[j]
2025-07-01 17:49:08.842 cruncher.set_seq2(bj)
2025-07-01 17:49:08.842 for i in range(alo, ahi):
2025-07-01 17:49:08.842 ai = a[i]
2025-07-01 17:49:08.842 if ai == bj:
2025-07-01 17:49:08.842 if eqi is None:
2025-07-01 17:49:08.842 eqi, eqj = i, j
2025-07-01 17:49:08.842 continue
2025-07-01 17:49:08.842 cruncher.set_seq1(ai)
2025-07-01 17:49:08.842 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.842 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.842 # compares by a factor of 3.
2025-07-01 17:49:08.842 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.842 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.842 # of the computation is cached by cruncher
2025-07-01 17:49:08.842 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.843 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.843 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.843 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.843 if best_ratio < cutoff:
2025-07-01 17:49:08.843 # no non-identical "pretty close" pair
2025-07-01 17:49:08.843 if eqi is None:
2025-07-01 17:49:08.843 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.843 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.843 return
2025-07-01 17:49:08.843 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.843 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.843 else:
2025-07-01 17:49:08.843 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.843 eqi = None
2025-07-01 17:49:08.843
2025-07-01 17:49:08.843 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.843 # identical
2025-07-01 17:49:08.843
2025-07-01 17:49:08.843 # pump out diffs from before the synch point
2025-07-01 17:49:08.843 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.843
2025-07-01 17:49:08.844 # do intraline marking on the synch pair
2025-07-01 17:49:08.844 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.844 if eqi is None:
2025-07-01 17:49:08.844 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.844 atags = btags = ""
2025-07-01 17:49:08.844 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.844 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.844 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.844 if tag == 'replace':
2025-07-01 17:49:08.844 atags += '^' * la
2025-07-01 17:49:08.844 btags += '^' * lb
2025-07-01 17:49:08.844 elif tag == 'delete':
2025-07-01 17:49:08.844 atags += '-' * la
2025-07-01 17:49:08.844 elif tag == 'insert':
2025-07-01 17:49:08.844 btags += '+' * lb
2025-07-01 17:49:08.844 elif tag == 'equal':
2025-07-01 17:49:08.844 atags += ' ' * la
2025-07-01 17:49:08.844 btags += ' ' * lb
2025-07-01 17:49:08.844 else:
2025-07-01 17:49:08.844 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.844 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.845 else:
2025-07-01 17:49:08.845 # the synch pair is identical
2025-07-01 17:49:08.845 yield ' ' + aelt
2025-07-01 17:49:08.845
2025-07-01 17:49:08.845 # pump out diffs from after the synch point
2025-07-01 17:49:08.845 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.845
2025-07-01 17:49:08.845 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.845 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.845
2025-07-01 17:49:08.845 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.845 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.845 alo = 448, ahi = 1101
2025-07-01 17:49:08.845 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.845 blo = 448, bhi = 1101
2025-07-01 17:49:08.845
2025-07-01 17:49:08.845 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.845 g = []
2025-07-01 17:49:08.845 if alo < ahi:
2025-07-01 17:49:08.845 if blo < bhi:
2025-07-01 17:49:08.845 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.846 else:
2025-07-01 17:49:08.846 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.846 elif blo < bhi:
2025-07-01 17:49:08.846 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.846
2025-07-01 17:49:08.846 > yield from g
2025-07-01 17:49:08.846
2025-07-01 17:49:08.846 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.846 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.846
2025-07-01 17:49:08.846 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.846 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.846 alo = 448, ahi = 1101
2025-07-01 17:49:08.846 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.846 blo = 448, bhi = 1101
2025-07-01 17:49:08.846
2025-07-01 17:49:08.846 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.846 r"""
2025-07-01 17:49:08.846 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.846 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.847 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.847 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.847
2025-07-01 17:49:08.847 Example:
2025-07-01 17:49:08.847
2025-07-01 17:49:08.847 >>> d = Differ()
2025-07-01 17:49:08.847 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.847 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.847 >>> print(''.join(results), end="")
2025-07-01 17:49:08.847 - abcDefghiJkl
2025-07-01 17:49:08.847 + abcdefGhijkl
2025-07-01 17:49:08.847 """
2025-07-01 17:49:08.847
2025-07-01 17:49:08.847 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.847 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.847 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.847 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.848 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.848
2025-07-01 17:49:08.848 # search for the pair that matches best without being identical
2025-07-01 17:49:08.848 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.848 # on junk -- unless we have to)
2025-07-01 17:49:08.848 for j in range(blo, bhi):
2025-07-01 17:49:08.848 bj = b[j]
2025-07-01 17:49:08.848 cruncher.set_seq2(bj)
2025-07-01 17:49:08.848 for i in range(alo, ahi):
2025-07-01 17:49:08.848 ai = a[i]
2025-07-01 17:49:08.848 if ai == bj:
2025-07-01 17:49:08.848 if eqi is None:
2025-07-01 17:49:08.848 eqi, eqj = i, j
2025-07-01 17:49:08.848 continue
2025-07-01 17:49:08.848 cruncher.set_seq1(ai)
2025-07-01 17:49:08.848 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.848 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.848 # compares by a factor of 3.
2025-07-01 17:49:08.848 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.848 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.849 # of the computation is cached by cruncher
2025-07-01 17:49:08.849 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.849 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.849 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.849 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.849 if best_ratio < cutoff:
2025-07-01 17:49:08.849 # no non-identical "pretty close" pair
2025-07-01 17:49:08.849 if eqi is None:
2025-07-01 17:49:08.849 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.849 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.849 return
2025-07-01 17:49:08.849 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.849 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.849 else:
2025-07-01 17:49:08.849 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.849 eqi = None
2025-07-01 17:49:08.849
2025-07-01 17:49:08.849 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.849 # identical
2025-07-01 17:49:08.849
2025-07-01 17:49:08.850 # pump out diffs from before the synch point
2025-07-01 17:49:08.850 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.850
2025-07-01 17:49:08.850 # do intraline marking on the synch pair
2025-07-01 17:49:08.850 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.850 if eqi is None:
2025-07-01 17:49:08.850 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.850 atags = btags = ""
2025-07-01 17:49:08.850 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.850 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.850 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.850 if tag == 'replace':
2025-07-01 17:49:08.850 atags += '^' * la
2025-07-01 17:49:08.850 btags += '^' * lb
2025-07-01 17:49:08.850 elif tag == 'delete':
2025-07-01 17:49:08.850 atags += '-' * la
2025-07-01 17:49:08.850 elif tag == 'insert':
2025-07-01 17:49:08.850 btags += '+' * lb
2025-07-01 17:49:08.850 elif tag == 'equal':
2025-07-01 17:49:08.850 atags += ' ' * la
2025-07-01 17:49:08.851 btags += ' ' * lb
2025-07-01 17:49:08.851 else:
2025-07-01 17:49:08.851 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.851 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.851 else:
2025-07-01 17:49:08.851 # the synch pair is identical
2025-07-01 17:49:08.851 yield ' ' + aelt
2025-07-01 17:49:08.851
2025-07-01 17:49:08.851 # pump out diffs from after the synch point
2025-07-01 17:49:08.851 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.851
2025-07-01 17:49:08.851 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.851 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.851
2025-07-01 17:49:08.851 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.851 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.851 alo = 449, ahi = 1101
2025-07-01 17:49:08.851 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.851 blo = 449, bhi = 1101
2025-07-01 17:49:08.851
2025-07-01 17:49:08.852 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.852 g = []
2025-07-01 17:49:08.852 if alo < ahi:
2025-07-01 17:49:08.852 if blo < bhi:
2025-07-01 17:49:08.852 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.852 else:
2025-07-01 17:49:08.852 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.852 elif blo < bhi:
2025-07-01 17:49:08.852 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.852
2025-07-01 17:49:08.852 > yield from g
2025-07-01 17:49:08.852
2025-07-01 17:49:08.852 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.852 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.852
2025-07-01 17:49:08.852 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.852 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.852 alo = 449, ahi = 1101
2025-07-01 17:49:08.852 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.852 blo = 449, bhi = 1101
2025-07-01 17:49:08.852
2025-07-01 17:49:08.852 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.853 r"""
2025-07-01 17:49:08.853 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.853 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.853 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.853 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.853
2025-07-01 17:49:08.853 Example:
2025-07-01 17:49:08.853
2025-07-01 17:49:08.853 >>> d = Differ()
2025-07-01 17:49:08.853 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.853 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.853 >>> print(''.join(results), end="")
2025-07-01 17:49:08.853 - abcDefghiJkl
2025-07-01 17:49:08.853 + abcdefGhijkl
2025-07-01 17:49:08.853 """
2025-07-01 17:49:08.853
2025-07-01 17:49:08.853 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.853 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.853 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.854 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.854 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.854
2025-07-01 17:49:08.854 # search for the pair that matches best without being identical
2025-07-01 17:49:08.854 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.854 # on junk -- unless we have to)
2025-07-01 17:49:08.854 for j in range(blo, bhi):
2025-07-01 17:49:08.854 bj = b[j]
2025-07-01 17:49:08.854 cruncher.set_seq2(bj)
2025-07-01 17:49:08.854 for i in range(alo, ahi):
2025-07-01 17:49:08.854 ai = a[i]
2025-07-01 17:49:08.854 if ai == bj:
2025-07-01 17:49:08.854 if eqi is None:
2025-07-01 17:49:08.854 eqi, eqj = i, j
2025-07-01 17:49:08.854 continue
2025-07-01 17:49:08.854 cruncher.set_seq1(ai)
2025-07-01 17:49:08.854 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.854 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.854 # compares by a factor of 3.
2025-07-01 17:49:08.854 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.854 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.854 # of the computation is cached by cruncher
2025-07-01 17:49:08.860 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.860 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.860 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.861 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.861 if best_ratio < cutoff:
2025-07-01 17:49:08.861 # no non-identical "pretty close" pair
2025-07-01 17:49:08.861 if eqi is None:
2025-07-01 17:49:08.861 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.861 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.861 return
2025-07-01 17:49:08.861 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.861 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.861 else:
2025-07-01 17:49:08.861 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.861 eqi = None
2025-07-01 17:49:08.861
2025-07-01 17:49:08.861 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.861 # identical
2025-07-01 17:49:08.861
2025-07-01 17:49:08.861 # pump out diffs from before the synch point
2025-07-01 17:49:08.861 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.862
2025-07-01 17:49:08.862 # do intraline marking on the synch pair
2025-07-01 17:49:08.862 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.862 if eqi is None:
2025-07-01 17:49:08.862 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.862 atags = btags = ""
2025-07-01 17:49:08.862 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.862 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.862 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.862 if tag == 'replace':
2025-07-01 17:49:08.862 atags += '^' * la
2025-07-01 17:49:08.862 btags += '^' * lb
2025-07-01 17:49:08.862 elif tag == 'delete':
2025-07-01 17:49:08.862 atags += '-' * la
2025-07-01 17:49:08.862 elif tag == 'insert':
2025-07-01 17:49:08.862 btags += '+' * lb
2025-07-01 17:49:08.862 elif tag == 'equal':
2025-07-01 17:49:08.862 atags += ' ' * la
2025-07-01 17:49:08.862 btags += ' ' * lb
2025-07-01 17:49:08.862 else:
2025-07-01 17:49:08.863 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.863 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.863 else:
2025-07-01 17:49:08.863 # the synch pair is identical
2025-07-01 17:49:08.863 yield ' ' + aelt
2025-07-01 17:49:08.863
2025-07-01 17:49:08.863 # pump out diffs from after the synch point
2025-07-01 17:49:08.863 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.863
2025-07-01 17:49:08.863 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.863 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.863
2025-07-01 17:49:08.863 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.863 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.863 alo = 450, ahi = 1101
2025-07-01 17:49:08.863 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.863 blo = 450, bhi = 1101
2025-07-01 17:49:08.863
2025-07-01 17:49:08.863 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.863 g = []
2025-07-01 17:49:08.864 if alo < ahi:
2025-07-01 17:49:08.864 if blo < bhi:
2025-07-01 17:49:08.864 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.864 else:
2025-07-01 17:49:08.864 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.864 elif blo < bhi:
2025-07-01 17:49:08.864 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.864
2025-07-01 17:49:08.864 > yield from g
2025-07-01 17:49:08.864
2025-07-01 17:49:08.864 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.864 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.864
2025-07-01 17:49:08.864 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.864 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.864 alo = 450, ahi = 1101
2025-07-01 17:49:08.864 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.864 blo = 450, bhi = 1101
2025-07-01 17:49:08.864
2025-07-01 17:49:08.864 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.864 r"""
2025-07-01 17:49:08.865 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.865 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.865 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.865 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.865
2025-07-01 17:49:08.865 Example:
2025-07-01 17:49:08.865
2025-07-01 17:49:08.865 >>> d = Differ()
2025-07-01 17:49:08.865 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.865 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.865 >>> print(''.join(results), end="")
2025-07-01 17:49:08.865 - abcDefghiJkl
2025-07-01 17:49:08.865 + abcdefGhijkl
2025-07-01 17:49:08.865 """
2025-07-01 17:49:08.865
2025-07-01 17:49:08.865 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.865 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.865 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.866 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.866 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.866
2025-07-01 17:49:08.866 # search for the pair that matches best without being identical
2025-07-01 17:49:08.866 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.866 # on junk -- unless we have to)
2025-07-01 17:49:08.866 for j in range(blo, bhi):
2025-07-01 17:49:08.866 bj = b[j]
2025-07-01 17:49:08.866 cruncher.set_seq2(bj)
2025-07-01 17:49:08.866 for i in range(alo, ahi):
2025-07-01 17:49:08.866 ai = a[i]
2025-07-01 17:49:08.866 if ai == bj:
2025-07-01 17:49:08.866 if eqi is None:
2025-07-01 17:49:08.866 eqi, eqj = i, j
2025-07-01 17:49:08.866 continue
2025-07-01 17:49:08.866 cruncher.set_seq1(ai)
2025-07-01 17:49:08.866 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.866 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.866 # compares by a factor of 3.
2025-07-01 17:49:08.866 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.866 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.867 # of the computation is cached by cruncher
2025-07-01 17:49:08.867 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.867 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.867 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.867 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.867 if best_ratio < cutoff:
2025-07-01 17:49:08.867 # no non-identical "pretty close" pair
2025-07-01 17:49:08.867 if eqi is None:
2025-07-01 17:49:08.867 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.867 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.867 return
2025-07-01 17:49:08.867 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.867 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.867 else:
2025-07-01 17:49:08.867 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.867 eqi = None
2025-07-01 17:49:08.867
2025-07-01 17:49:08.867 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.867 # identical
2025-07-01 17:49:08.867
2025-07-01 17:49:08.868 # pump out diffs from before the synch point
2025-07-01 17:49:08.868 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.868
2025-07-01 17:49:08.868 # do intraline marking on the synch pair
2025-07-01 17:49:08.868 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.868 if eqi is None:
2025-07-01 17:49:08.868 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.868 atags = btags = ""
2025-07-01 17:49:08.868 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.868 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.868 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.868 if tag == 'replace':
2025-07-01 17:49:08.868 atags += '^' * la
2025-07-01 17:49:08.868 btags += '^' * lb
2025-07-01 17:49:08.868 elif tag == 'delete':
2025-07-01 17:49:08.868 atags += '-' * la
2025-07-01 17:49:08.868 elif tag == 'insert':
2025-07-01 17:49:08.868 btags += '+' * lb
2025-07-01 17:49:08.868 elif tag == 'equal':
2025-07-01 17:49:08.868 atags += ' ' * la
2025-07-01 17:49:08.868 btags += ' ' * lb
2025-07-01 17:49:08.869 else:
2025-07-01 17:49:08.869 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.869 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.869 else:
2025-07-01 17:49:08.869 # the synch pair is identical
2025-07-01 17:49:08.869 yield ' ' + aelt
2025-07-01 17:49:08.869
2025-07-01 17:49:08.869 # pump out diffs from after the synch point
2025-07-01 17:49:08.869 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.869
2025-07-01 17:49:08.869 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.869 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.869
2025-07-01 17:49:08.869 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.869 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.869 alo = 451, ahi = 1101
2025-07-01 17:49:08.869 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.869 blo = 451, bhi = 1101
2025-07-01 17:49:08.869
2025-07-01 17:49:08.869 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.869 g = []
2025-07-01 17:49:08.870 if alo < ahi:
2025-07-01 17:49:08.870 if blo < bhi:
2025-07-01 17:49:08.870 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.870 else:
2025-07-01 17:49:08.870 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.870 elif blo < bhi:
2025-07-01 17:49:08.870 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.870
2025-07-01 17:49:08.870 > yield from g
2025-07-01 17:49:08.870
2025-07-01 17:49:08.870 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.870 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.870
2025-07-01 17:49:08.870 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.870 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.870 alo = 451, ahi = 1101
2025-07-01 17:49:08.870 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.870 blo = 451, bhi = 1101
2025-07-01 17:49:08.873
2025-07-01 17:49:08.873 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.873 r"""
2025-07-01 17:49:08.874 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.874 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.874 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.874 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.874
2025-07-01 17:49:08.874 Example:
2025-07-01 17:49:08.874
2025-07-01 17:49:08.874 >>> d = Differ()
2025-07-01 17:49:08.874 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.874 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.874 >>> print(''.join(results), end="")
2025-07-01 17:49:08.874 - abcDefghiJkl
2025-07-01 17:49:08.874 + abcdefGhijkl
2025-07-01 17:49:08.874 """
2025-07-01 17:49:08.874
2025-07-01 17:49:08.874 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.874 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.874 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.874 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.875 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.875
2025-07-01 17:49:08.875 # search for the pair that matches best without being identical
2025-07-01 17:49:08.875 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.875 # on junk -- unless we have to)
2025-07-01 17:49:08.875 for j in range(blo, bhi):
2025-07-01 17:49:08.875 bj = b[j]
2025-07-01 17:49:08.875 cruncher.set_seq2(bj)
2025-07-01 17:49:08.875 for i in range(alo, ahi):
2025-07-01 17:49:08.875 ai = a[i]
2025-07-01 17:49:08.875 if ai == bj:
2025-07-01 17:49:08.875 if eqi is None:
2025-07-01 17:49:08.875 eqi, eqj = i, j
2025-07-01 17:49:08.875 continue
2025-07-01 17:49:08.875 cruncher.set_seq1(ai)
2025-07-01 17:49:08.875 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.875 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.875 # compares by a factor of 3.
2025-07-01 17:49:08.875 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.875 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.875 # of the computation is cached by cruncher
2025-07-01 17:49:08.875 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.876 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.876 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.876 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.876 if best_ratio < cutoff:
2025-07-01 17:49:08.876 # no non-identical "pretty close" pair
2025-07-01 17:49:08.876 if eqi is None:
2025-07-01 17:49:08.876 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.876 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.876 return
2025-07-01 17:49:08.876 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.876 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.876 else:
2025-07-01 17:49:08.876 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.876 eqi = None
2025-07-01 17:49:08.876
2025-07-01 17:49:08.876 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.876 # identical
2025-07-01 17:49:08.876
2025-07-01 17:49:08.876 # pump out diffs from before the synch point
2025-07-01 17:49:08.876 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.876
2025-07-01 17:49:08.877 # do intraline marking on the synch pair
2025-07-01 17:49:08.877 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.877 if eqi is None:
2025-07-01 17:49:08.877 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.877 atags = btags = ""
2025-07-01 17:49:08.877 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.877 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.877 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.877 if tag == 'replace':
2025-07-01 17:49:08.877 atags += '^' * la
2025-07-01 17:49:08.877 btags += '^' * lb
2025-07-01 17:49:08.877 elif tag == 'delete':
2025-07-01 17:49:08.877 atags += '-' * la
2025-07-01 17:49:08.877 elif tag == 'insert':
2025-07-01 17:49:08.877 btags += '+' * lb
2025-07-01 17:49:08.877 elif tag == 'equal':
2025-07-01 17:49:08.877 atags += ' ' * la
2025-07-01 17:49:08.877 btags += ' ' * lb
2025-07-01 17:49:08.877 else:
2025-07-01 17:49:08.877 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.878 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.878 else:
2025-07-01 17:49:08.878 # the synch pair is identical
2025-07-01 17:49:08.878 yield ' ' + aelt
2025-07-01 17:49:08.878
2025-07-01 17:49:08.878 # pump out diffs from after the synch point
2025-07-01 17:49:08.878 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.878
2025-07-01 17:49:08.878 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.878 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.878
2025-07-01 17:49:08.878 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.878 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.878 alo = 452, ahi = 1101
2025-07-01 17:49:08.878 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.878 blo = 452, bhi = 1101
2025-07-01 17:49:08.878
2025-07-01 17:49:08.878 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.878 g = []
2025-07-01 17:49:08.878 if alo < ahi:
2025-07-01 17:49:08.878 if blo < bhi:
2025-07-01 17:49:08.879 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.879 else:
2025-07-01 17:49:08.879 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.879 elif blo < bhi:
2025-07-01 17:49:08.879 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.879
2025-07-01 17:49:08.879 > yield from g
2025-07-01 17:49:08.879
2025-07-01 17:49:08.879 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.879 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.879
2025-07-01 17:49:08.879 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.879 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.879 alo = 452, ahi = 1101
2025-07-01 17:49:08.879 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.879 blo = 452, bhi = 1101
2025-07-01 17:49:08.879
2025-07-01 17:49:08.879 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.879 r"""
2025-07-01 17:49:08.879 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.879 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.880 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.880 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.880
2025-07-01 17:49:08.880 Example:
2025-07-01 17:49:08.880
2025-07-01 17:49:08.880 >>> d = Differ()
2025-07-01 17:49:08.880 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.880 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.880 >>> print(''.join(results), end="")
2025-07-01 17:49:08.880 - abcDefghiJkl
2025-07-01 17:49:08.880 + abcdefGhijkl
2025-07-01 17:49:08.880 """
2025-07-01 17:49:08.880
2025-07-01 17:49:08.880 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.880 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.880 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.880 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.880 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.880
2025-07-01 17:49:08.880 # search for the pair that matches best without being identical
2025-07-01 17:49:08.881 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.881 # on junk -- unless we have to)
2025-07-01 17:49:08.881 for j in range(blo, bhi):
2025-07-01 17:49:08.881 bj = b[j]
2025-07-01 17:49:08.881 cruncher.set_seq2(bj)
2025-07-01 17:49:08.881 for i in range(alo, ahi):
2025-07-01 17:49:08.881 ai = a[i]
2025-07-01 17:49:08.881 if ai == bj:
2025-07-01 17:49:08.881 if eqi is None:
2025-07-01 17:49:08.881 eqi, eqj = i, j
2025-07-01 17:49:08.881 continue
2025-07-01 17:49:08.881 cruncher.set_seq1(ai)
2025-07-01 17:49:08.881 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.881 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.881 # compares by a factor of 3.
2025-07-01 17:49:08.881 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.881 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.881 # of the computation is cached by cruncher
2025-07-01 17:49:08.881 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.881 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.882 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.882 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.882 if best_ratio < cutoff:
2025-07-01 17:49:08.882 # no non-identical "pretty close" pair
2025-07-01 17:49:08.882 if eqi is None:
2025-07-01 17:49:08.882 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.882 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.882 return
2025-07-01 17:49:08.882 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.882 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.882 else:
2025-07-01 17:49:08.882 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.882 eqi = None
2025-07-01 17:49:08.882
2025-07-01 17:49:08.882 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.882 # identical
2025-07-01 17:49:08.882
2025-07-01 17:49:08.882 # pump out diffs from before the synch point
2025-07-01 17:49:08.882 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.882
2025-07-01 17:49:08.882 # do intraline marking on the synch pair
2025-07-01 17:49:08.883 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.883 if eqi is None:
2025-07-01 17:49:08.883 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.883 atags = btags = ""
2025-07-01 17:49:08.883 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.883 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.883 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.883 if tag == 'replace':
2025-07-01 17:49:08.883 atags += '^' * la
2025-07-01 17:49:08.883 btags += '^' * lb
2025-07-01 17:49:08.883 elif tag == 'delete':
2025-07-01 17:49:08.883 atags += '-' * la
2025-07-01 17:49:08.883 elif tag == 'insert':
2025-07-01 17:49:08.883 btags += '+' * lb
2025-07-01 17:49:08.883 elif tag == 'equal':
2025-07-01 17:49:08.883 atags += ' ' * la
2025-07-01 17:49:08.883 btags += ' ' * lb
2025-07-01 17:49:08.883 else:
2025-07-01 17:49:08.883 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.883 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.883 else:
2025-07-01 17:49:08.883 # the synch pair is identical
2025-07-01 17:49:08.884 yield ' ' + aelt
2025-07-01 17:49:08.884
2025-07-01 17:49:08.884 # pump out diffs from after the synch point
2025-07-01 17:49:08.884 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.884
2025-07-01 17:49:08.884 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.884 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.884
2025-07-01 17:49:08.884 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.884 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.884 alo = 453, ahi = 1101
2025-07-01 17:49:08.884 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.884 blo = 453, bhi = 1101
2025-07-01 17:49:08.884
2025-07-01 17:49:08.884 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.884 g = []
2025-07-01 17:49:08.884 if alo < ahi:
2025-07-01 17:49:08.884 if blo < bhi:
2025-07-01 17:49:08.884 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.884 else:
2025-07-01 17:49:08.884 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.885 elif blo < bhi:
2025-07-01 17:49:08.885 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.885
2025-07-01 17:49:08.885 > yield from g
2025-07-01 17:49:08.885
2025-07-01 17:49:08.885 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.885 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.885
2025-07-01 17:49:08.885 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.885 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.885 alo = 453, ahi = 1101
2025-07-01 17:49:08.885 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.885 blo = 453, bhi = 1101
2025-07-01 17:49:08.885
2025-07-01 17:49:08.885 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.885 r"""
2025-07-01 17:49:08.885 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.885 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.885 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.885 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.885
2025-07-01 17:49:08.885 Example:
2025-07-01 17:49:08.886
2025-07-01 17:49:08.886 >>> d = Differ()
2025-07-01 17:49:08.886 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.886 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.886 >>> print(''.join(results), end="")
2025-07-01 17:49:08.886 - abcDefghiJkl
2025-07-01 17:49:08.886 + abcdefGhijkl
2025-07-01 17:49:08.886 """
2025-07-01 17:49:08.886
2025-07-01 17:49:08.886 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.886 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.886 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.886 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.886 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.886
2025-07-01 17:49:08.886 # search for the pair that matches best without being identical
2025-07-01 17:49:08.886 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.886 # on junk -- unless we have to)
2025-07-01 17:49:08.886 for j in range(blo, bhi):
2025-07-01 17:49:08.892 bj = b[j]
2025-07-01 17:49:08.892 cruncher.set_seq2(bj)
2025-07-01 17:49:08.892 for i in range(alo, ahi):
2025-07-01 17:49:08.892 ai = a[i]
2025-07-01 17:49:08.892 if ai == bj:
2025-07-01 17:49:08.892 if eqi is None:
2025-07-01 17:49:08.892 eqi, eqj = i, j
2025-07-01 17:49:08.892 continue
2025-07-01 17:49:08.892 cruncher.set_seq1(ai)
2025-07-01 17:49:08.892 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.892 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.892 # compares by a factor of 3.
2025-07-01 17:49:08.892 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.892 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.892 # of the computation is cached by cruncher
2025-07-01 17:49:08.892 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.892 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.892 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.892 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.892 if best_ratio < cutoff:
2025-07-01 17:49:08.893 # no non-identical "pretty close" pair
2025-07-01 17:49:08.893 if eqi is None:
2025-07-01 17:49:08.893 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.893 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.893 return
2025-07-01 17:49:08.893 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.893 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.893 else:
2025-07-01 17:49:08.893 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.893 eqi = None
2025-07-01 17:49:08.893
2025-07-01 17:49:08.893 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.893 # identical
2025-07-01 17:49:08.893
2025-07-01 17:49:08.893 # pump out diffs from before the synch point
2025-07-01 17:49:08.893 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.893
2025-07-01 17:49:08.893 # do intraline marking on the synch pair
2025-07-01 17:49:08.893 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.893 if eqi is None:
2025-07-01 17:49:08.893 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.894 atags = btags = ""
2025-07-01 17:49:08.894 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.894 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.894 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.894 if tag == 'replace':
2025-07-01 17:49:08.894 atags += '^' * la
2025-07-01 17:49:08.894 btags += '^' * lb
2025-07-01 17:49:08.894 elif tag == 'delete':
2025-07-01 17:49:08.894 atags += '-' * la
2025-07-01 17:49:08.894 elif tag == 'insert':
2025-07-01 17:49:08.894 btags += '+' * lb
2025-07-01 17:49:08.894 elif tag == 'equal':
2025-07-01 17:49:08.894 atags += ' ' * la
2025-07-01 17:49:08.894 btags += ' ' * lb
2025-07-01 17:49:08.894 else:
2025-07-01 17:49:08.894 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.894 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.894 else:
2025-07-01 17:49:08.894 # the synch pair is identical
2025-07-01 17:49:08.895 yield ' ' + aelt
2025-07-01 17:49:08.895
2025-07-01 17:49:08.895 # pump out diffs from after the synch point
2025-07-01 17:49:08.895 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.895
2025-07-01 17:49:08.895 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.895 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.895
2025-07-01 17:49:08.895 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.895 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.895 alo = 454, ahi = 1101
2025-07-01 17:49:08.895 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.895 blo = 454, bhi = 1101
2025-07-01 17:49:08.895
2025-07-01 17:49:08.895 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.895 g = []
2025-07-01 17:49:08.895 if alo < ahi:
2025-07-01 17:49:08.895 if blo < bhi:
2025-07-01 17:49:08.895 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.896 else:
2025-07-01 17:49:08.896 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.896 elif blo < bhi:
2025-07-01 17:49:08.896 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.896
2025-07-01 17:49:08.896 > yield from g
2025-07-01 17:49:08.896
2025-07-01 17:49:08.896 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.896 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.896
2025-07-01 17:49:08.896 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.896 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.896 alo = 454, ahi = 1101
2025-07-01 17:49:08.896 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.896 blo = 454, bhi = 1101
2025-07-01 17:49:08.896
2025-07-01 17:49:08.896 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.896 r"""
2025-07-01 17:49:08.896 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.896 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.896 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.897 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.897
2025-07-01 17:49:08.897 Example:
2025-07-01 17:49:08.897
2025-07-01 17:49:08.897 >>> d = Differ()
2025-07-01 17:49:08.897 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.897 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.897 >>> print(''.join(results), end="")
2025-07-01 17:49:08.897 - abcDefghiJkl
2025-07-01 17:49:08.897 + abcdefGhijkl
2025-07-01 17:49:08.897 """
2025-07-01 17:49:08.897
2025-07-01 17:49:08.897 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.897 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.897 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.897 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.897 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.897
2025-07-01 17:49:08.898 # search for the pair that matches best without being identical
2025-07-01 17:49:08.898 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.898 # on junk -- unless we have to)
2025-07-01 17:49:08.898 for j in range(blo, bhi):
2025-07-01 17:49:08.898 bj = b[j]
2025-07-01 17:49:08.898 cruncher.set_seq2(bj)
2025-07-01 17:49:08.898 for i in range(alo, ahi):
2025-07-01 17:49:08.898 ai = a[i]
2025-07-01 17:49:08.898 if ai == bj:
2025-07-01 17:49:08.898 if eqi is None:
2025-07-01 17:49:08.898 eqi, eqj = i, j
2025-07-01 17:49:08.898 continue
2025-07-01 17:49:08.898 cruncher.set_seq1(ai)
2025-07-01 17:49:08.898 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.898 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.898 # compares by a factor of 3.
2025-07-01 17:49:08.898 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.898 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.898 # of the computation is cached by cruncher
2025-07-01 17:49:08.898 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.899 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.899 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.899 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.899 if best_ratio < cutoff:
2025-07-01 17:49:08.899 # no non-identical "pretty close" pair
2025-07-01 17:49:08.899 if eqi is None:
2025-07-01 17:49:08.899 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.899 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.899 return
2025-07-01 17:49:08.899 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.899 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.899 else:
2025-07-01 17:49:08.899 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.899 eqi = None
2025-07-01 17:49:08.899
2025-07-01 17:49:08.899 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.899 # identical
2025-07-01 17:49:08.899
2025-07-01 17:49:08.899 # pump out diffs from before the synch point
2025-07-01 17:49:08.899 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.899
2025-07-01 17:49:08.900 # do intraline marking on the synch pair
2025-07-01 17:49:08.900 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.900 if eqi is None:
2025-07-01 17:49:08.900 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.900 atags = btags = ""
2025-07-01 17:49:08.900 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.900 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.900 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.900 if tag == 'replace':
2025-07-01 17:49:08.900 atags += '^' * la
2025-07-01 17:49:08.900 btags += '^' * lb
2025-07-01 17:49:08.900 elif tag == 'delete':
2025-07-01 17:49:08.900 atags += '-' * la
2025-07-01 17:49:08.900 elif tag == 'insert':
2025-07-01 17:49:08.900 btags += '+' * lb
2025-07-01 17:49:08.900 elif tag == 'equal':
2025-07-01 17:49:08.900 atags += ' ' * la
2025-07-01 17:49:08.900 btags += ' ' * lb
2025-07-01 17:49:08.900 else:
2025-07-01 17:49:08.900 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.901 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.901 else:
2025-07-01 17:49:08.901 # the synch pair is identical
2025-07-01 17:49:08.901 yield ' ' + aelt
2025-07-01 17:49:08.901
2025-07-01 17:49:08.901 # pump out diffs from after the synch point
2025-07-01 17:49:08.901 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.901
2025-07-01 17:49:08.901 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.901 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.901
2025-07-01 17:49:08.901 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.901 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.901 alo = 455, ahi = 1101
2025-07-01 17:49:08.901 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.901 blo = 455, bhi = 1101
2025-07-01 17:49:08.901
2025-07-01 17:49:08.901 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.901 g = []
2025-07-01 17:49:08.901 if alo < ahi:
2025-07-01 17:49:08.901 if blo < bhi:
2025-07-01 17:49:08.904 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.905 else:
2025-07-01 17:49:08.905 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.905 elif blo < bhi:
2025-07-01 17:49:08.905 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.905
2025-07-01 17:49:08.905 > yield from g
2025-07-01 17:49:08.905
2025-07-01 17:49:08.905 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.905 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.905
2025-07-01 17:49:08.905 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.905 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.905 alo = 455, ahi = 1101
2025-07-01 17:49:08.905 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.905 blo = 455, bhi = 1101
2025-07-01 17:49:08.905
2025-07-01 17:49:08.905 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.905 r"""
2025-07-01 17:49:08.905 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.906 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.906 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.906 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.906
2025-07-01 17:49:08.906 Example:
2025-07-01 17:49:08.906
2025-07-01 17:49:08.906 >>> d = Differ()
2025-07-01 17:49:08.906 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.906 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.906 >>> print(''.join(results), end="")
2025-07-01 17:49:08.906 - abcDefghiJkl
2025-07-01 17:49:08.906 + abcdefGhijkl
2025-07-01 17:49:08.906 """
2025-07-01 17:49:08.906
2025-07-01 17:49:08.906 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.906 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.906 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.906 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.907 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.907
2025-07-01 17:49:08.907 # search for the pair that matches best without being identical
2025-07-01 17:49:08.907 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.907 # on junk -- unless we have to)
2025-07-01 17:49:08.907 for j in range(blo, bhi):
2025-07-01 17:49:08.907 bj = b[j]
2025-07-01 17:49:08.907 cruncher.set_seq2(bj)
2025-07-01 17:49:08.907 for i in range(alo, ahi):
2025-07-01 17:49:08.907 ai = a[i]
2025-07-01 17:49:08.907 if ai == bj:
2025-07-01 17:49:08.907 if eqi is None:
2025-07-01 17:49:08.907 eqi, eqj = i, j
2025-07-01 17:49:08.907 continue
2025-07-01 17:49:08.907 cruncher.set_seq1(ai)
2025-07-01 17:49:08.907 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.907 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.907 # compares by a factor of 3.
2025-07-01 17:49:08.907 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.907 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.907 # of the computation is cached by cruncher
2025-07-01 17:49:08.908 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.908 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.908 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.908 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.908 if best_ratio < cutoff:
2025-07-01 17:49:08.908 # no non-identical "pretty close" pair
2025-07-01 17:49:08.908 if eqi is None:
2025-07-01 17:49:08.908 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.908 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.908 return
2025-07-01 17:49:08.908 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.908 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.908 else:
2025-07-01 17:49:08.908 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.908 eqi = None
2025-07-01 17:49:08.908
2025-07-01 17:49:08.908 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.908 # identical
2025-07-01 17:49:08.908
2025-07-01 17:49:08.908 # pump out diffs from before the synch point
2025-07-01 17:49:08.908 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.908
2025-07-01 17:49:08.908 # do intraline marking on the synch pair
2025-07-01 17:49:08.908 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.908 if eqi is None:
2025-07-01 17:49:08.908 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.908 atags = btags = ""
2025-07-01 17:49:08.909 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.909 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.909 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.909 if tag == 'replace':
2025-07-01 17:49:08.909 atags += '^' * la
2025-07-01 17:49:08.909 btags += '^' * lb
2025-07-01 17:49:08.909 elif tag == 'delete':
2025-07-01 17:49:08.909 atags += '-' * la
2025-07-01 17:49:08.909 elif tag == 'insert':
2025-07-01 17:49:08.909 btags += '+' * lb
2025-07-01 17:49:08.909 elif tag == 'equal':
2025-07-01 17:49:08.909 atags += ' ' * la
2025-07-01 17:49:08.909 btags += ' ' * lb
2025-07-01 17:49:08.909 else:
2025-07-01 17:49:08.909 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.909 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.909 else:
2025-07-01 17:49:08.909 # the synch pair is identical
2025-07-01 17:49:08.909 yield ' ' + aelt
2025-07-01 17:49:08.909
2025-07-01 17:49:08.909 # pump out diffs from after the synch point
2025-07-01 17:49:08.910 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.910
2025-07-01 17:49:08.910 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.910 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.910
2025-07-01 17:49:08.910 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.910 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.910 alo = 456, ahi = 1101
2025-07-01 17:49:08.910 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.910 blo = 456, bhi = 1101
2025-07-01 17:49:08.910
2025-07-01 17:49:08.910 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.910 g = []
2025-07-01 17:49:08.910 if alo < ahi:
2025-07-01 17:49:08.910 if blo < bhi:
2025-07-01 17:49:08.910 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.910 else:
2025-07-01 17:49:08.910 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.910 elif blo < bhi:
2025-07-01 17:49:08.910 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.910
2025-07-01 17:49:08.911 > yield from g
2025-07-01 17:49:08.911
2025-07-01 17:49:08.911 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.911 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.911
2025-07-01 17:49:08.911 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.911 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.911 alo = 456, ahi = 1101
2025-07-01 17:49:08.911 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.911 blo = 456, bhi = 1101
2025-07-01 17:49:08.911
2025-07-01 17:49:08.911 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.911 r"""
2025-07-01 17:49:08.911 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.911 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.911 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.911 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.911
2025-07-01 17:49:08.911 Example:
2025-07-01 17:49:08.911
2025-07-01 17:49:08.911 >>> d = Differ()
2025-07-01 17:49:08.912 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.912 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.912 >>> print(''.join(results), end="")
2025-07-01 17:49:08.912 - abcDefghiJkl
2025-07-01 17:49:08.912 + abcdefGhijkl
2025-07-01 17:49:08.912 """
2025-07-01 17:49:08.912
2025-07-01 17:49:08.912 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.912 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.912 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.912 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.912 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.912
2025-07-01 17:49:08.912 # search for the pair that matches best without being identical
2025-07-01 17:49:08.912 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.912 # on junk -- unless we have to)
2025-07-01 17:49:08.912 for j in range(blo, bhi):
2025-07-01 17:49:08.912 bj = b[j]
2025-07-01 17:49:08.912 cruncher.set_seq2(bj)
2025-07-01 17:49:08.912 for i in range(alo, ahi):
2025-07-01 17:49:08.913 ai = a[i]
2025-07-01 17:49:08.913 if ai == bj:
2025-07-01 17:49:08.913 if eqi is None:
2025-07-01 17:49:08.913 eqi, eqj = i, j
2025-07-01 17:49:08.913 continue
2025-07-01 17:49:08.913 cruncher.set_seq1(ai)
2025-07-01 17:49:08.913 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.913 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.913 # compares by a factor of 3.
2025-07-01 17:49:08.913 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.913 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.913 # of the computation is cached by cruncher
2025-07-01 17:49:08.913 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.913 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.913 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.913 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.913 if best_ratio < cutoff:
2025-07-01 17:49:08.913 # no non-identical "pretty close" pair
2025-07-01 17:49:08.913 if eqi is None:
2025-07-01 17:49:08.913 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.914 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.914 return
2025-07-01 17:49:08.914 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.914 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.914 else:
2025-07-01 17:49:08.914 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.914 eqi = None
2025-07-01 17:49:08.914
2025-07-01 17:49:08.914 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.914 # identical
2025-07-01 17:49:08.914
2025-07-01 17:49:08.914 # pump out diffs from before the synch point
2025-07-01 17:49:08.914 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.914
2025-07-01 17:49:08.914 # do intraline marking on the synch pair
2025-07-01 17:49:08.914 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.914 if eqi is None:
2025-07-01 17:49:08.914 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.914 atags = btags = ""
2025-07-01 17:49:08.915 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.915 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.915 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.915 if tag == 'replace':
2025-07-01 17:49:08.915 atags += '^' * la
2025-07-01 17:49:08.915 btags += '^' * lb
2025-07-01 17:49:08.915 elif tag == 'delete':
2025-07-01 17:49:08.915 atags += '-' * la
2025-07-01 17:49:08.915 elif tag == 'insert':
2025-07-01 17:49:08.915 btags += '+' * lb
2025-07-01 17:49:08.915 elif tag == 'equal':
2025-07-01 17:49:08.915 atags += ' ' * la
2025-07-01 17:49:08.915 btags += ' ' * lb
2025-07-01 17:49:08.915 else:
2025-07-01 17:49:08.915 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.915 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.915 else:
2025-07-01 17:49:08.915 # the synch pair is identical
2025-07-01 17:49:08.915 yield ' ' + aelt
2025-07-01 17:49:08.915
2025-07-01 17:49:08.915 # pump out diffs from after the synch point
2025-07-01 17:49:08.916 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.916
2025-07-01 17:49:08.916 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.916 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.916
2025-07-01 17:49:08.916 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.916 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.916 alo = 457, ahi = 1101
2025-07-01 17:49:08.916 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.916 blo = 457, bhi = 1101
2025-07-01 17:49:08.916
2025-07-01 17:49:08.916 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.916 g = []
2025-07-01 17:49:08.916 if alo < ahi:
2025-07-01 17:49:08.916 if blo < bhi:
2025-07-01 17:49:08.916 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.916 else:
2025-07-01 17:49:08.916 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.916 elif blo < bhi:
2025-07-01 17:49:08.916 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.916
2025-07-01 17:49:08.917 > yield from g
2025-07-01 17:49:08.923
2025-07-01 17:49:08.923 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.923 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.923
2025-07-01 17:49:08.923 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.923 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.923 alo = 457, ahi = 1101
2025-07-01 17:49:08.923 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.923 blo = 457, bhi = 1101
2025-07-01 17:49:08.923
2025-07-01 17:49:08.923 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.923 r"""
2025-07-01 17:49:08.923 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.923 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.923 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.923 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.924
2025-07-01 17:49:08.924 Example:
2025-07-01 17:49:08.924
2025-07-01 17:49:08.924 >>> d = Differ()
2025-07-01 17:49:08.924 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.924 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.924 >>> print(''.join(results), end="")
2025-07-01 17:49:08.924 - abcDefghiJkl
2025-07-01 17:49:08.924 + abcdefGhijkl
2025-07-01 17:49:08.924 """
2025-07-01 17:49:08.924
2025-07-01 17:49:08.924 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.924 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.924 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.924 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.924 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.924
2025-07-01 17:49:08.924 # search for the pair that matches best without being identical
2025-07-01 17:49:08.924 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.925 # on junk -- unless we have to)
2025-07-01 17:49:08.925 for j in range(blo, bhi):
2025-07-01 17:49:08.925 bj = b[j]
2025-07-01 17:49:08.925 cruncher.set_seq2(bj)
2025-07-01 17:49:08.925 for i in range(alo, ahi):
2025-07-01 17:49:08.925 ai = a[i]
2025-07-01 17:49:08.925 if ai == bj:
2025-07-01 17:49:08.925 if eqi is None:
2025-07-01 17:49:08.925 eqi, eqj = i, j
2025-07-01 17:49:08.925 continue
2025-07-01 17:49:08.925 cruncher.set_seq1(ai)
2025-07-01 17:49:08.925 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.925 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.925 # compares by a factor of 3.
2025-07-01 17:49:08.925 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.925 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.925 # of the computation is cached by cruncher
2025-07-01 17:49:08.925 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.925 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.925 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.925 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.925 if best_ratio < cutoff:
2025-07-01 17:49:08.926 # no non-identical "pretty close" pair
2025-07-01 17:49:08.926 if eqi is None:
2025-07-01 17:49:08.926 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.926 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.926 return
2025-07-01 17:49:08.926 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.926 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.926 else:
2025-07-01 17:49:08.926 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.926 eqi = None
2025-07-01 17:49:08.926
2025-07-01 17:49:08.926 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.926 # identical
2025-07-01 17:49:08.926
2025-07-01 17:49:08.926 # pump out diffs from before the synch point
2025-07-01 17:49:08.926 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.926
2025-07-01 17:49:08.926 # do intraline marking on the synch pair
2025-07-01 17:49:08.926 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.926 if eqi is None:
2025-07-01 17:49:08.926 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.927 atags = btags = ""
2025-07-01 17:49:08.927 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.927 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.927 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.927 if tag == 'replace':
2025-07-01 17:49:08.927 atags += '^' * la
2025-07-01 17:49:08.927 btags += '^' * lb
2025-07-01 17:49:08.927 elif tag == 'delete':
2025-07-01 17:49:08.927 atags += '-' * la
2025-07-01 17:49:08.927 elif tag == 'insert':
2025-07-01 17:49:08.927 btags += '+' * lb
2025-07-01 17:49:08.927 elif tag == 'equal':
2025-07-01 17:49:08.927 atags += ' ' * la
2025-07-01 17:49:08.927 btags += ' ' * lb
2025-07-01 17:49:08.927 else:
2025-07-01 17:49:08.927 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.927 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.927 else:
2025-07-01 17:49:08.927 # the synch pair is identical
2025-07-01 17:49:08.927 yield ' ' + aelt
2025-07-01 17:49:08.927
2025-07-01 17:49:08.927 # pump out diffs from after the synch point
2025-07-01 17:49:08.928 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.928
2025-07-01 17:49:08.928 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.928 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.928
2025-07-01 17:49:08.928 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.928 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.928 alo = 458, ahi = 1101
2025-07-01 17:49:08.928 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.928 blo = 458, bhi = 1101
2025-07-01 17:49:08.928
2025-07-01 17:49:08.928 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.928 g = []
2025-07-01 17:49:08.928 if alo < ahi:
2025-07-01 17:49:08.928 if blo < bhi:
2025-07-01 17:49:08.928 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.928 else:
2025-07-01 17:49:08.928 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.929 elif blo < bhi:
2025-07-01 17:49:08.929 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.929
2025-07-01 17:49:08.929 > yield from g
2025-07-01 17:49:08.929
2025-07-01 17:49:08.929 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.929 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.929
2025-07-01 17:49:08.929 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.929 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.929 alo = 458, ahi = 1101
2025-07-01 17:49:08.929 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.929 blo = 458, bhi = 1101
2025-07-01 17:49:08.929
2025-07-01 17:49:08.929 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.929 r"""
2025-07-01 17:49:08.929 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.929 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.929 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.929 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.930
2025-07-01 17:49:08.930 Example:
2025-07-01 17:49:08.930
2025-07-01 17:49:08.930 >>> d = Differ()
2025-07-01 17:49:08.930 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.930 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.930 >>> print(''.join(results), end="")
2025-07-01 17:49:08.930 - abcDefghiJkl
2025-07-01 17:49:08.930 + abcdefGhijkl
2025-07-01 17:49:08.930 """
2025-07-01 17:49:08.930
2025-07-01 17:49:08.930 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.930 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.930 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.930 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.930 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.930
2025-07-01 17:49:08.930 # search for the pair that matches best without being identical
2025-07-01 17:49:08.930 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.931 # on junk -- unless we have to)
2025-07-01 17:49:08.931 for j in range(blo, bhi):
2025-07-01 17:49:08.931 bj = b[j]
2025-07-01 17:49:08.931 cruncher.set_seq2(bj)
2025-07-01 17:49:08.931 for i in range(alo, ahi):
2025-07-01 17:49:08.931 ai = a[i]
2025-07-01 17:49:08.931 if ai == bj:
2025-07-01 17:49:08.931 if eqi is None:
2025-07-01 17:49:08.931 eqi, eqj = i, j
2025-07-01 17:49:08.931 continue
2025-07-01 17:49:08.931 cruncher.set_seq1(ai)
2025-07-01 17:49:08.931 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.931 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.931 # compares by a factor of 3.
2025-07-01 17:49:08.931 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.931 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.931 # of the computation is cached by cruncher
2025-07-01 17:49:08.931 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.931 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.931 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.931 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.931 if best_ratio < cutoff:
2025-07-01 17:49:08.932 # no non-identical "pretty close" pair
2025-07-01 17:49:08.932 if eqi is None:
2025-07-01 17:49:08.932 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.932 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.932 return
2025-07-01 17:49:08.932 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.932 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.932 else:
2025-07-01 17:49:08.932 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.932 eqi = None
2025-07-01 17:49:08.932
2025-07-01 17:49:08.932 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.932 # identical
2025-07-01 17:49:08.932
2025-07-01 17:49:08.932 # pump out diffs from before the synch point
2025-07-01 17:49:08.932 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.932
2025-07-01 17:49:08.932 # do intraline marking on the synch pair
2025-07-01 17:49:08.932 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.932 if eqi is None:
2025-07-01 17:49:08.932 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.935 atags = btags = ""
2025-07-01 17:49:08.935 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.935 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.935 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.935 if tag == 'replace':
2025-07-01 17:49:08.935 atags += '^' * la
2025-07-01 17:49:08.935 btags += '^' * lb
2025-07-01 17:49:08.935 elif tag == 'delete':
2025-07-01 17:49:08.935 atags += '-' * la
2025-07-01 17:49:08.935 elif tag == 'insert':
2025-07-01 17:49:08.935 btags += '+' * lb
2025-07-01 17:49:08.935 elif tag == 'equal':
2025-07-01 17:49:08.935 atags += ' ' * la
2025-07-01 17:49:08.935 btags += ' ' * lb
2025-07-01 17:49:08.935 else:
2025-07-01 17:49:08.935 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.935 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.935 else:
2025-07-01 17:49:08.935 # the synch pair is identical
2025-07-01 17:49:08.935 yield ' ' + aelt
2025-07-01 17:49:08.936
2025-07-01 17:49:08.936 # pump out diffs from after the synch point
2025-07-01 17:49:08.936 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.936
2025-07-01 17:49:08.936 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.936 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.936
2025-07-01 17:49:08.936 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.936 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.936 alo = 459, ahi = 1101
2025-07-01 17:49:08.936 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.936 blo = 459, bhi = 1101
2025-07-01 17:49:08.936
2025-07-01 17:49:08.936 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.936 g = []
2025-07-01 17:49:08.936 if alo < ahi:
2025-07-01 17:49:08.936 if blo < bhi:
2025-07-01 17:49:08.936 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.936 else:
2025-07-01 17:49:08.936 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.937 elif blo < bhi:
2025-07-01 17:49:08.937 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.937
2025-07-01 17:49:08.937 > yield from g
2025-07-01 17:49:08.937
2025-07-01 17:49:08.937 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.937 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.937
2025-07-01 17:49:08.937 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.937 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.937 alo = 459, ahi = 1101
2025-07-01 17:49:08.937 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.937 blo = 459, bhi = 1101
2025-07-01 17:49:08.937
2025-07-01 17:49:08.937 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.937 r"""
2025-07-01 17:49:08.937 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.937 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.937 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.937 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.937
2025-07-01 17:49:08.937 Example:
2025-07-01 17:49:08.938
2025-07-01 17:49:08.938 >>> d = Differ()
2025-07-01 17:49:08.938 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.938 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.938 >>> print(''.join(results), end="")
2025-07-01 17:49:08.938 - abcDefghiJkl
2025-07-01 17:49:08.938 + abcdefGhijkl
2025-07-01 17:49:08.938 """
2025-07-01 17:49:08.938
2025-07-01 17:49:08.938 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.938 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.938 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.938 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.938 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.938
2025-07-01 17:49:08.938 # search for the pair that matches best without being identical
2025-07-01 17:49:08.939 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.939 # on junk -- unless we have to)
2025-07-01 17:49:08.939 for j in range(blo, bhi):
2025-07-01 17:49:08.939 bj = b[j]
2025-07-01 17:49:08.939 cruncher.set_seq2(bj)
2025-07-01 17:49:08.939 for i in range(alo, ahi):
2025-07-01 17:49:08.939 ai = a[i]
2025-07-01 17:49:08.939 if ai == bj:
2025-07-01 17:49:08.939 if eqi is None:
2025-07-01 17:49:08.939 eqi, eqj = i, j
2025-07-01 17:49:08.939 continue
2025-07-01 17:49:08.939 cruncher.set_seq1(ai)
2025-07-01 17:49:08.939 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.939 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.939 # compares by a factor of 3.
2025-07-01 17:49:08.939 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.939 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.939 # of the computation is cached by cruncher
2025-07-01 17:49:08.939 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.939 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.940 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.940 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.940 if best_ratio < cutoff:
2025-07-01 17:49:08.940 # no non-identical "pretty close" pair
2025-07-01 17:49:08.940 if eqi is None:
2025-07-01 17:49:08.940 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.940 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.940 return
2025-07-01 17:49:08.940 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.940 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.940 else:
2025-07-01 17:49:08.940 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.940 eqi = None
2025-07-01 17:49:08.940
2025-07-01 17:49:08.940 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.940 # identical
2025-07-01 17:49:08.940
2025-07-01 17:49:08.940 # pump out diffs from before the synch point
2025-07-01 17:49:08.940 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.940
2025-07-01 17:49:08.940 # do intraline marking on the synch pair
2025-07-01 17:49:08.941 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.941 if eqi is None:
2025-07-01 17:49:08.941 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.941 atags = btags = ""
2025-07-01 17:49:08.941 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.941 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.941 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.941 if tag == 'replace':
2025-07-01 17:49:08.941 atags += '^' * la
2025-07-01 17:49:08.941 btags += '^' * lb
2025-07-01 17:49:08.941 elif tag == 'delete':
2025-07-01 17:49:08.941 atags += '-' * la
2025-07-01 17:49:08.941 elif tag == 'insert':
2025-07-01 17:49:08.941 btags += '+' * lb
2025-07-01 17:49:08.941 elif tag == 'equal':
2025-07-01 17:49:08.941 atags += ' ' * la
2025-07-01 17:49:08.941 btags += ' ' * lb
2025-07-01 17:49:08.941 else:
2025-07-01 17:49:08.941 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.941 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.942 else:
2025-07-01 17:49:08.942 # the synch pair is identical
2025-07-01 17:49:08.942 yield ' ' + aelt
2025-07-01 17:49:08.942
2025-07-01 17:49:08.942 # pump out diffs from after the synch point
2025-07-01 17:49:08.942 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.942
2025-07-01 17:49:08.942 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.942 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.942
2025-07-01 17:49:08.942 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.942 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.942 alo = 460, ahi = 1101
2025-07-01 17:49:08.942 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.942 blo = 460, bhi = 1101
2025-07-01 17:49:08.942
2025-07-01 17:49:08.942 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.942 g = []
2025-07-01 17:49:08.942 if alo < ahi:
2025-07-01 17:49:08.942 if blo < bhi:
2025-07-01 17:49:08.942 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.943 else:
2025-07-01 17:49:08.943 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.943 elif blo < bhi:
2025-07-01 17:49:08.943 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.943
2025-07-01 17:49:08.943 > yield from g
2025-07-01 17:49:08.943
2025-07-01 17:49:08.943 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.943 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.943
2025-07-01 17:49:08.943 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.943 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.943 alo = 460, ahi = 1101
2025-07-01 17:49:08.943 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.943 blo = 460, bhi = 1101
2025-07-01 17:49:08.943
2025-07-01 17:49:08.943 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.943 r"""
2025-07-01 17:49:08.943 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.943 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.944 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.944 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.944
2025-07-01 17:49:08.944 Example:
2025-07-01 17:49:08.944
2025-07-01 17:49:08.944 >>> d = Differ()
2025-07-01 17:49:08.944 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.944 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.944 >>> print(''.join(results), end="")
2025-07-01 17:49:08.944 - abcDefghiJkl
2025-07-01 17:49:08.944 + abcdefGhijkl
2025-07-01 17:49:08.944 """
2025-07-01 17:49:08.944
2025-07-01 17:49:08.944 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.944 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.944 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.944 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.944 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.944
2025-07-01 17:49:08.945 # search for the pair that matches best without being identical
2025-07-01 17:49:08.945 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.945 # on junk -- unless we have to)
2025-07-01 17:49:08.945 for j in range(blo, bhi):
2025-07-01 17:49:08.945 bj = b[j]
2025-07-01 17:49:08.945 cruncher.set_seq2(bj)
2025-07-01 17:49:08.945 for i in range(alo, ahi):
2025-07-01 17:49:08.945 ai = a[i]
2025-07-01 17:49:08.945 if ai == bj:
2025-07-01 17:49:08.945 if eqi is None:
2025-07-01 17:49:08.945 eqi, eqj = i, j
2025-07-01 17:49:08.945 continue
2025-07-01 17:49:08.945 cruncher.set_seq1(ai)
2025-07-01 17:49:08.945 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.945 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.945 # compares by a factor of 3.
2025-07-01 17:49:08.945 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.945 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.945 # of the computation is cached by cruncher
2025-07-01 17:49:08.945 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.945 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.946 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.946 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.946 if best_ratio < cutoff:
2025-07-01 17:49:08.946 # no non-identical "pretty close" pair
2025-07-01 17:49:08.946 if eqi is None:
2025-07-01 17:49:08.946 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.946 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.946 return
2025-07-01 17:49:08.946 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.946 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.946 else:
2025-07-01 17:49:08.946 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.946 eqi = None
2025-07-01 17:49:08.946
2025-07-01 17:49:08.946 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.946 # identical
2025-07-01 17:49:08.946
2025-07-01 17:49:08.946 # pump out diffs from before the synch point
2025-07-01 17:49:08.946 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.946
2025-07-01 17:49:08.946 # do intraline marking on the synch pair
2025-07-01 17:49:08.947 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.947 if eqi is None:
2025-07-01 17:49:08.947 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.947 atags = btags = ""
2025-07-01 17:49:08.947 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.947 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.947 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.947 if tag == 'replace':
2025-07-01 17:49:08.947 atags += '^' * la
2025-07-01 17:49:08.947 btags += '^' * lb
2025-07-01 17:49:08.947 elif tag == 'delete':
2025-07-01 17:49:08.947 atags += '-' * la
2025-07-01 17:49:08.947 elif tag == 'insert':
2025-07-01 17:49:08.947 btags += '+' * lb
2025-07-01 17:49:08.947 elif tag == 'equal':
2025-07-01 17:49:08.947 atags += ' ' * la
2025-07-01 17:49:08.947 btags += ' ' * lb
2025-07-01 17:49:08.953 else:
2025-07-01 17:49:08.953 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.953 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.953 else:
2025-07-01 17:49:08.953 # the synch pair is identical
2025-07-01 17:49:08.953 yield ' ' + aelt
2025-07-01 17:49:08.953
2025-07-01 17:49:08.953 # pump out diffs from after the synch point
2025-07-01 17:49:08.953 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.953
2025-07-01 17:49:08.953 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.953 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.953
2025-07-01 17:49:08.953 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.953 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.953 alo = 461, ahi = 1101
2025-07-01 17:49:08.953 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.953 blo = 461, bhi = 1101
2025-07-01 17:49:08.953
2025-07-01 17:49:08.954 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.954 g = []
2025-07-01 17:49:08.954 if alo < ahi:
2025-07-01 17:49:08.954 if blo < bhi:
2025-07-01 17:49:08.954 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.954 else:
2025-07-01 17:49:08.954 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.954 elif blo < bhi:
2025-07-01 17:49:08.954 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.954
2025-07-01 17:49:08.954 > yield from g
2025-07-01 17:49:08.954
2025-07-01 17:49:08.954 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.954 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.954
2025-07-01 17:49:08.954 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.954 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.954 alo = 461, ahi = 1101
2025-07-01 17:49:08.954 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.954 blo = 461, bhi = 1101
2025-07-01 17:49:08.954
2025-07-01 17:49:08.955 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.955 r"""
2025-07-01 17:49:08.955 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.955 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.955 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.955 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.955
2025-07-01 17:49:08.955 Example:
2025-07-01 17:49:08.955
2025-07-01 17:49:08.955 >>> d = Differ()
2025-07-01 17:49:08.955 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.955 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.955 >>> print(''.join(results), end="")
2025-07-01 17:49:08.955 - abcDefghiJkl
2025-07-01 17:49:08.955 + abcdefGhijkl
2025-07-01 17:49:08.955 """
2025-07-01 17:49:08.955
2025-07-01 17:49:08.955 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.955 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.955 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.956 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.956 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.956
2025-07-01 17:49:08.956 # search for the pair that matches best without being identical
2025-07-01 17:49:08.956 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.956 # on junk -- unless we have to)
2025-07-01 17:49:08.956 for j in range(blo, bhi):
2025-07-01 17:49:08.956 bj = b[j]
2025-07-01 17:49:08.956 cruncher.set_seq2(bj)
2025-07-01 17:49:08.956 for i in range(alo, ahi):
2025-07-01 17:49:08.956 ai = a[i]
2025-07-01 17:49:08.956 if ai == bj:
2025-07-01 17:49:08.956 if eqi is None:
2025-07-01 17:49:08.956 eqi, eqj = i, j
2025-07-01 17:49:08.956 continue
2025-07-01 17:49:08.956 cruncher.set_seq1(ai)
2025-07-01 17:49:08.956 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.956 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.956 # compares by a factor of 3.
2025-07-01 17:49:08.956 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.956 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.957 # of the computation is cached by cruncher
2025-07-01 17:49:08.957 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.957 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.957 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.957 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.957 if best_ratio < cutoff:
2025-07-01 17:49:08.957 # no non-identical "pretty close" pair
2025-07-01 17:49:08.957 if eqi is None:
2025-07-01 17:49:08.957 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.957 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.957 return
2025-07-01 17:49:08.957 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.957 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.957 else:
2025-07-01 17:49:08.957 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.957 eqi = None
2025-07-01 17:49:08.957
2025-07-01 17:49:08.957 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.957 # identical
2025-07-01 17:49:08.957
2025-07-01 17:49:08.957 # pump out diffs from before the synch point
2025-07-01 17:49:08.958 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.958
2025-07-01 17:49:08.958 # do intraline marking on the synch pair
2025-07-01 17:49:08.958 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.958 if eqi is None:
2025-07-01 17:49:08.958 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.958 atags = btags = ""
2025-07-01 17:49:08.958 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.958 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.958 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.958 if tag == 'replace':
2025-07-01 17:49:08.958 atags += '^' * la
2025-07-01 17:49:08.958 btags += '^' * lb
2025-07-01 17:49:08.958 elif tag == 'delete':
2025-07-01 17:49:08.958 atags += '-' * la
2025-07-01 17:49:08.958 elif tag == 'insert':
2025-07-01 17:49:08.958 btags += '+' * lb
2025-07-01 17:49:08.958 elif tag == 'equal':
2025-07-01 17:49:08.958 atags += ' ' * la
2025-07-01 17:49:08.958 btags += ' ' * lb
2025-07-01 17:49:08.958 else:
2025-07-01 17:49:08.958 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.959 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.959 else:
2025-07-01 17:49:08.959 # the synch pair is identical
2025-07-01 17:49:08.959 yield ' ' + aelt
2025-07-01 17:49:08.959
2025-07-01 17:49:08.959 # pump out diffs from after the synch point
2025-07-01 17:49:08.959 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.959
2025-07-01 17:49:08.959 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.959 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.959
2025-07-01 17:49:08.959 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.959 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.959 alo = 462, ahi = 1101
2025-07-01 17:49:08.959 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.959 blo = 462, bhi = 1101
2025-07-01 17:49:08.959
2025-07-01 17:49:08.959 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.959 g = []
2025-07-01 17:49:08.959 if alo < ahi:
2025-07-01 17:49:08.959 if blo < bhi:
2025-07-01 17:49:08.960 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.960 else:
2025-07-01 17:49:08.960 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.960 elif blo < bhi:
2025-07-01 17:49:08.960 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.960
2025-07-01 17:49:08.960 > yield from g
2025-07-01 17:49:08.960
2025-07-01 17:49:08.960 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.960 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.960
2025-07-01 17:49:08.960 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.960 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.960 alo = 462, ahi = 1101
2025-07-01 17:49:08.960 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.960 blo = 462, bhi = 1101
2025-07-01 17:49:08.960
2025-07-01 17:49:08.960 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.960 r"""
2025-07-01 17:49:08.961 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.961 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.961 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.961 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.961
2025-07-01 17:49:08.961 Example:
2025-07-01 17:49:08.961
2025-07-01 17:49:08.961 >>> d = Differ()
2025-07-01 17:49:08.961 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.961 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.961 >>> print(''.join(results), end="")
2025-07-01 17:49:08.961 - abcDefghiJkl
2025-07-01 17:49:08.961 + abcdefGhijkl
2025-07-01 17:49:08.961 """
2025-07-01 17:49:08.961
2025-07-01 17:49:08.961 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.961 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.961 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.962 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.962 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.962
2025-07-01 17:49:08.962 # search for the pair that matches best without being identical
2025-07-01 17:49:08.962 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.962 # on junk -- unless we have to)
2025-07-01 17:49:08.962 for j in range(blo, bhi):
2025-07-01 17:49:08.962 bj = b[j]
2025-07-01 17:49:08.962 cruncher.set_seq2(bj)
2025-07-01 17:49:08.962 for i in range(alo, ahi):
2025-07-01 17:49:08.962 ai = a[i]
2025-07-01 17:49:08.962 if ai == bj:
2025-07-01 17:49:08.962 if eqi is None:
2025-07-01 17:49:08.962 eqi, eqj = i, j
2025-07-01 17:49:08.962 continue
2025-07-01 17:49:08.962 cruncher.set_seq1(ai)
2025-07-01 17:49:08.962 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.962 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.962 # compares by a factor of 3.
2025-07-01 17:49:08.962 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.962 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.965 # of the computation is cached by cruncher
2025-07-01 17:49:08.965 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.966 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.966 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.966 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.966 if best_ratio < cutoff:
2025-07-01 17:49:08.966 # no non-identical "pretty close" pair
2025-07-01 17:49:08.966 if eqi is None:
2025-07-01 17:49:08.966 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.966 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.966 return
2025-07-01 17:49:08.966 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.966 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.966 else:
2025-07-01 17:49:08.966 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.966 eqi = None
2025-07-01 17:49:08.966
2025-07-01 17:49:08.966 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.966 # identical
2025-07-01 17:49:08.966
2025-07-01 17:49:08.966 # pump out diffs from before the synch point
2025-07-01 17:49:08.966 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.966
2025-07-01 17:49:08.967 # do intraline marking on the synch pair
2025-07-01 17:49:08.967 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.967 if eqi is None:
2025-07-01 17:49:08.967 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.967 atags = btags = ""
2025-07-01 17:49:08.967 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.967 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.967 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.967 if tag == 'replace':
2025-07-01 17:49:08.967 atags += '^' * la
2025-07-01 17:49:08.967 btags += '^' * lb
2025-07-01 17:49:08.967 elif tag == 'delete':
2025-07-01 17:49:08.967 atags += '-' * la
2025-07-01 17:49:08.967 elif tag == 'insert':
2025-07-01 17:49:08.967 btags += '+' * lb
2025-07-01 17:49:08.967 elif tag == 'equal':
2025-07-01 17:49:08.967 atags += ' ' * la
2025-07-01 17:49:08.967 btags += ' ' * lb
2025-07-01 17:49:08.967 else:
2025-07-01 17:49:08.967 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.967 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.967 else:
2025-07-01 17:49:08.968 # the synch pair is identical
2025-07-01 17:49:08.968 yield ' ' + aelt
2025-07-01 17:49:08.968
2025-07-01 17:49:08.968 # pump out diffs from after the synch point
2025-07-01 17:49:08.968 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.968
2025-07-01 17:49:08.968 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.968 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.968
2025-07-01 17:49:08.968 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.968 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.968 alo = 463, ahi = 1101
2025-07-01 17:49:08.968 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.968 blo = 463, bhi = 1101
2025-07-01 17:49:08.968
2025-07-01 17:49:08.968 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.968 g = []
2025-07-01 17:49:08.968 if alo < ahi:
2025-07-01 17:49:08.968 if blo < bhi:
2025-07-01 17:49:08.968 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.969 else:
2025-07-01 17:49:08.969 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.969 elif blo < bhi:
2025-07-01 17:49:08.969 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.969
2025-07-01 17:49:08.969 > yield from g
2025-07-01 17:49:08.969
2025-07-01 17:49:08.969 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.969 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.969
2025-07-01 17:49:08.969 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.969 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.969 alo = 463, ahi = 1101
2025-07-01 17:49:08.969 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.969 blo = 463, bhi = 1101
2025-07-01 17:49:08.969
2025-07-01 17:49:08.969 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.969 r"""
2025-07-01 17:49:08.969 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.970 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.970 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.970 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.970
2025-07-01 17:49:08.970 Example:
2025-07-01 17:49:08.970
2025-07-01 17:49:08.970 >>> d = Differ()
2025-07-01 17:49:08.970 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.970 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.970 >>> print(''.join(results), end="")
2025-07-01 17:49:08.970 - abcDefghiJkl
2025-07-01 17:49:08.970 + abcdefGhijkl
2025-07-01 17:49:08.970 """
2025-07-01 17:49:08.970
2025-07-01 17:49:08.970 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.970 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.970 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.970 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.970 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.971
2025-07-01 17:49:08.971 # search for the pair that matches best without being identical
2025-07-01 17:49:08.971 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.971 # on junk -- unless we have to)
2025-07-01 17:49:08.971 for j in range(blo, bhi):
2025-07-01 17:49:08.971 bj = b[j]
2025-07-01 17:49:08.971 cruncher.set_seq2(bj)
2025-07-01 17:49:08.971 for i in range(alo, ahi):
2025-07-01 17:49:08.971 ai = a[i]
2025-07-01 17:49:08.971 if ai == bj:
2025-07-01 17:49:08.971 if eqi is None:
2025-07-01 17:49:08.971 eqi, eqj = i, j
2025-07-01 17:49:08.971 continue
2025-07-01 17:49:08.971 cruncher.set_seq1(ai)
2025-07-01 17:49:08.971 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.971 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.971 # compares by a factor of 3.
2025-07-01 17:49:08.971 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.971 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.972 # of the computation is cached by cruncher
2025-07-01 17:49:08.972 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.972 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.972 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.972 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.972 if best_ratio < cutoff:
2025-07-01 17:49:08.972 # no non-identical "pretty close" pair
2025-07-01 17:49:08.972 if eqi is None:
2025-07-01 17:49:08.972 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.972 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.972 return
2025-07-01 17:49:08.972 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.972 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.972 else:
2025-07-01 17:49:08.972 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.972 eqi = None
2025-07-01 17:49:08.972
2025-07-01 17:49:08.972 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.972 # identical
2025-07-01 17:49:08.972
2025-07-01 17:49:08.972 # pump out diffs from before the synch point
2025-07-01 17:49:08.973 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.973
2025-07-01 17:49:08.973 # do intraline marking on the synch pair
2025-07-01 17:49:08.973 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.973 if eqi is None:
2025-07-01 17:49:08.973 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.973 atags = btags = ""
2025-07-01 17:49:08.973 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.973 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.973 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.973 if tag == 'replace':
2025-07-01 17:49:08.973 atags += '^' * la
2025-07-01 17:49:08.973 btags += '^' * lb
2025-07-01 17:49:08.973 elif tag == 'delete':
2025-07-01 17:49:08.973 atags += '-' * la
2025-07-01 17:49:08.973 elif tag == 'insert':
2025-07-01 17:49:08.973 btags += '+' * lb
2025-07-01 17:49:08.973 elif tag == 'equal':
2025-07-01 17:49:08.973 atags += ' ' * la
2025-07-01 17:49:08.973 btags += ' ' * lb
2025-07-01 17:49:08.974 else:
2025-07-01 17:49:08.974 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.974 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.974 else:
2025-07-01 17:49:08.974 # the synch pair is identical
2025-07-01 17:49:08.974 yield ' ' + aelt
2025-07-01 17:49:08.974
2025-07-01 17:49:08.974 # pump out diffs from after the synch point
2025-07-01 17:49:08.974 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.974
2025-07-01 17:49:08.974 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.974 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.974
2025-07-01 17:49:08.974 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.974 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.974 alo = 466, ahi = 1101
2025-07-01 17:49:08.974 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.974 blo = 466, bhi = 1101
2025-07-01 17:49:08.974
2025-07-01 17:49:08.974 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.974 g = []
2025-07-01 17:49:08.975 if alo < ahi:
2025-07-01 17:49:08.975 if blo < bhi:
2025-07-01 17:49:08.975 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.975 else:
2025-07-01 17:49:08.975 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.975 elif blo < bhi:
2025-07-01 17:49:08.975 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.975
2025-07-01 17:49:08.975 > yield from g
2025-07-01 17:49:08.975
2025-07-01 17:49:08.975 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.975 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.975
2025-07-01 17:49:08.975 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.975 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.975 alo = 466, ahi = 1101
2025-07-01 17:49:08.975 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.975 blo = 466, bhi = 1101
2025-07-01 17:49:08.975
2025-07-01 17:49:08.975 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.975 r"""
2025-07-01 17:49:08.975 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.976 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.976 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.976 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.976
2025-07-01 17:49:08.976 Example:
2025-07-01 17:49:08.976
2025-07-01 17:49:08.976 >>> d = Differ()
2025-07-01 17:49:08.976 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.976 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.976 >>> print(''.join(results), end="")
2025-07-01 17:49:08.976 - abcDefghiJkl
2025-07-01 17:49:08.976 + abcdefGhijkl
2025-07-01 17:49:08.976 """
2025-07-01 17:49:08.976
2025-07-01 17:49:08.976 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.976 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.976 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.976 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.976 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.977
2025-07-01 17:49:08.977 # search for the pair that matches best without being identical
2025-07-01 17:49:08.977 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.977 # on junk -- unless we have to)
2025-07-01 17:49:08.977 for j in range(blo, bhi):
2025-07-01 17:49:08.977 bj = b[j]
2025-07-01 17:49:08.977 cruncher.set_seq2(bj)
2025-07-01 17:49:08.977 for i in range(alo, ahi):
2025-07-01 17:49:08.977 ai = a[i]
2025-07-01 17:49:08.977 if ai == bj:
2025-07-01 17:49:08.977 if eqi is None:
2025-07-01 17:49:08.977 eqi, eqj = i, j
2025-07-01 17:49:08.977 continue
2025-07-01 17:49:08.977 cruncher.set_seq1(ai)
2025-07-01 17:49:08.977 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.977 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.977 # compares by a factor of 3.
2025-07-01 17:49:08.977 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.977 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.977 # of the computation is cached by cruncher
2025-07-01 17:49:08.977 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.977 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.978 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.978 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.978 if best_ratio < cutoff:
2025-07-01 17:49:08.978 # no non-identical "pretty close" pair
2025-07-01 17:49:08.978 if eqi is None:
2025-07-01 17:49:08.978 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.978 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.978 return
2025-07-01 17:49:08.978 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.978 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.978 else:
2025-07-01 17:49:08.978 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.978 eqi = None
2025-07-01 17:49:08.978
2025-07-01 17:49:08.978 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.978 # identical
2025-07-01 17:49:08.978
2025-07-01 17:49:08.978 # pump out diffs from before the synch point
2025-07-01 17:49:08.978 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.978
2025-07-01 17:49:08.978 # do intraline marking on the synch pair
2025-07-01 17:49:08.979 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.985 if eqi is None:
2025-07-01 17:49:08.985 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.985 atags = btags = ""
2025-07-01 17:49:08.985 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.985 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.985 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.985 if tag == 'replace':
2025-07-01 17:49:08.985 atags += '^' * la
2025-07-01 17:49:08.985 btags += '^' * lb
2025-07-01 17:49:08.985 elif tag == 'delete':
2025-07-01 17:49:08.985 atags += '-' * la
2025-07-01 17:49:08.985 elif tag == 'insert':
2025-07-01 17:49:08.985 btags += '+' * lb
2025-07-01 17:49:08.985 elif tag == 'equal':
2025-07-01 17:49:08.985 atags += ' ' * la
2025-07-01 17:49:08.985 btags += ' ' * lb
2025-07-01 17:49:08.985 else:
2025-07-01 17:49:08.985 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.985 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.986 else:
2025-07-01 17:49:08.986 # the synch pair is identical
2025-07-01 17:49:08.986 yield ' ' + aelt
2025-07-01 17:49:08.986
2025-07-01 17:49:08.986 # pump out diffs from after the synch point
2025-07-01 17:49:08.986 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.986
2025-07-01 17:49:08.986 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.986 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.986
2025-07-01 17:49:08.986 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.986 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.986 alo = 467, ahi = 1101
2025-07-01 17:49:08.986 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.986 blo = 467, bhi = 1101
2025-07-01 17:49:08.986
2025-07-01 17:49:08.986 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.986 g = []
2025-07-01 17:49:08.987 if alo < ahi:
2025-07-01 17:49:08.987 if blo < bhi:
2025-07-01 17:49:08.987 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.987 else:
2025-07-01 17:49:08.987 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.987 elif blo < bhi:
2025-07-01 17:49:08.987 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.987
2025-07-01 17:49:08.987 > yield from g
2025-07-01 17:49:08.987
2025-07-01 17:49:08.987 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.987 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.987
2025-07-01 17:49:08.987 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.987 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.987 alo = 467, ahi = 1101
2025-07-01 17:49:08.987 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.987 blo = 467, bhi = 1101
2025-07-01 17:49:08.987
2025-07-01 17:49:08.987 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.988 r"""
2025-07-01 17:49:08.988 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.988 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.988 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.988 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.988
2025-07-01 17:49:08.988 Example:
2025-07-01 17:49:08.988
2025-07-01 17:49:08.988 >>> d = Differ()
2025-07-01 17:49:08.988 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.988 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.988 >>> print(''.join(results), end="")
2025-07-01 17:49:08.988 - abcDefghiJkl
2025-07-01 17:49:08.988 + abcdefGhijkl
2025-07-01 17:49:08.988 """
2025-07-01 17:49:08.988
2025-07-01 17:49:08.988 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.988 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.988 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.989 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.989 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.989
2025-07-01 17:49:08.989 # search for the pair that matches best without being identical
2025-07-01 17:49:08.989 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.989 # on junk -- unless we have to)
2025-07-01 17:49:08.989 for j in range(blo, bhi):
2025-07-01 17:49:08.989 bj = b[j]
2025-07-01 17:49:08.989 cruncher.set_seq2(bj)
2025-07-01 17:49:08.989 for i in range(alo, ahi):
2025-07-01 17:49:08.989 ai = a[i]
2025-07-01 17:49:08.989 if ai == bj:
2025-07-01 17:49:08.989 if eqi is None:
2025-07-01 17:49:08.989 eqi, eqj = i, j
2025-07-01 17:49:08.989 continue
2025-07-01 17:49:08.989 cruncher.set_seq1(ai)
2025-07-01 17:49:08.989 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.989 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.989 # compares by a factor of 3.
2025-07-01 17:49:08.989 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.990 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.990 # of the computation is cached by cruncher
2025-07-01 17:49:08.990 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.990 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.990 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.990 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.990 if best_ratio < cutoff:
2025-07-01 17:49:08.990 # no non-identical "pretty close" pair
2025-07-01 17:49:08.990 if eqi is None:
2025-07-01 17:49:08.990 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.990 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.990 return
2025-07-01 17:49:08.990 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.990 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.990 else:
2025-07-01 17:49:08.990 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.990 eqi = None
2025-07-01 17:49:08.990
2025-07-01 17:49:08.990 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.990 # identical
2025-07-01 17:49:08.990
2025-07-01 17:49:08.991 # pump out diffs from before the synch point
2025-07-01 17:49:08.991 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:08.991
2025-07-01 17:49:08.991 # do intraline marking on the synch pair
2025-07-01 17:49:08.991 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:08.991 if eqi is None:
2025-07-01 17:49:08.991 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:08.991 atags = btags = ""
2025-07-01 17:49:08.991 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:08.991 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:08.991 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:08.991 if tag == 'replace':
2025-07-01 17:49:08.991 atags += '^' * la
2025-07-01 17:49:08.991 btags += '^' * lb
2025-07-01 17:49:08.991 elif tag == 'delete':
2025-07-01 17:49:08.991 atags += '-' * la
2025-07-01 17:49:08.991 elif tag == 'insert':
2025-07-01 17:49:08.991 btags += '+' * lb
2025-07-01 17:49:08.991 elif tag == 'equal':
2025-07-01 17:49:08.991 atags += ' ' * la
2025-07-01 17:49:08.992 btags += ' ' * lb
2025-07-01 17:49:08.992 else:
2025-07-01 17:49:08.992 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:08.992 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:08.992 else:
2025-07-01 17:49:08.992 # the synch pair is identical
2025-07-01 17:49:08.992 yield ' ' + aelt
2025-07-01 17:49:08.992
2025-07-01 17:49:08.992 # pump out diffs from after the synch point
2025-07-01 17:49:08.992 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:08.992
2025-07-01 17:49:08.992 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:08.992 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.992
2025-07-01 17:49:08.992 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.992 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.992 alo = 468, ahi = 1101
2025-07-01 17:49:08.992 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.992 blo = 468, bhi = 1101
2025-07-01 17:49:08.992
2025-07-01 17:49:08.992 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.993 g = []
2025-07-01 17:49:08.993 if alo < ahi:
2025-07-01 17:49:08.993 if blo < bhi:
2025-07-01 17:49:08.993 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.993 else:
2025-07-01 17:49:08.993 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:08.993 elif blo < bhi:
2025-07-01 17:49:08.993 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:08.993
2025-07-01 17:49:08.993 > yield from g
2025-07-01 17:49:08.993
2025-07-01 17:49:08.993 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:08.993 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:08.993
2025-07-01 17:49:08.993 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:08.993 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:08.993 alo = 468, ahi = 1101
2025-07-01 17:49:08.993 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:08.993 blo = 468, bhi = 1101
2025-07-01 17:49:08.993
2025-07-01 17:49:08.994 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:08.994 r"""
2025-07-01 17:49:08.994 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:08.994 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:08.994 synch point, and intraline difference marking is done on the
2025-07-01 17:49:08.994 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:08.994
2025-07-01 17:49:08.994 Example:
2025-07-01 17:49:08.994
2025-07-01 17:49:08.994 >>> d = Differ()
2025-07-01 17:49:08.994 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:08.994 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:08.994 >>> print(''.join(results), end="")
2025-07-01 17:49:08.994 - abcDefghiJkl
2025-07-01 17:49:08.994 + abcdefGhijkl
2025-07-01 17:49:08.994 """
2025-07-01 17:49:08.994
2025-07-01 17:49:08.994 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:08.995 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:08.997 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:08.998 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:08.998 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:08.998
2025-07-01 17:49:08.998 # search for the pair that matches best without being identical
2025-07-01 17:49:08.998 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:08.998 # on junk -- unless we have to)
2025-07-01 17:49:08.998 for j in range(blo, bhi):
2025-07-01 17:49:08.998 bj = b[j]
2025-07-01 17:49:08.998 cruncher.set_seq2(bj)
2025-07-01 17:49:08.998 for i in range(alo, ahi):
2025-07-01 17:49:08.998 ai = a[i]
2025-07-01 17:49:08.998 if ai == bj:
2025-07-01 17:49:08.998 if eqi is None:
2025-07-01 17:49:08.998 eqi, eqj = i, j
2025-07-01 17:49:08.998 continue
2025-07-01 17:49:08.998 cruncher.set_seq1(ai)
2025-07-01 17:49:08.998 # computing similarity is expensive, so use the quick
2025-07-01 17:49:08.998 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:08.998 # compares by a factor of 3.
2025-07-01 17:49:08.998 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:08.998 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:08.999 # of the computation is cached by cruncher
2025-07-01 17:49:08.999 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:08.999 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:08.999 cruncher.ratio() > best_ratio:
2025-07-01 17:49:08.999 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:08.999 if best_ratio < cutoff:
2025-07-01 17:49:08.999 # no non-identical "pretty close" pair
2025-07-01 17:49:08.999 if eqi is None:
2025-07-01 17:49:08.999 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:08.999 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:08.999 return
2025-07-01 17:49:08.999 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:08.999 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:08.999 else:
2025-07-01 17:49:08.999 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:08.999 eqi = None
2025-07-01 17:49:08.999
2025-07-01 17:49:08.999 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:08.999 # identical
2025-07-01 17:49:08.999
2025-07-01 17:49:09.000 # pump out diffs from before the synch point
2025-07-01 17:49:09.000 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.000
2025-07-01 17:49:09.000 # do intraline marking on the synch pair
2025-07-01 17:49:09.000 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.000 if eqi is None:
2025-07-01 17:49:09.000 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.000 atags = btags = ""
2025-07-01 17:49:09.000 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.000 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.000 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.000 if tag == 'replace':
2025-07-01 17:49:09.000 atags += '^' * la
2025-07-01 17:49:09.000 btags += '^' * lb
2025-07-01 17:49:09.000 elif tag == 'delete':
2025-07-01 17:49:09.000 atags += '-' * la
2025-07-01 17:49:09.000 elif tag == 'insert':
2025-07-01 17:49:09.000 btags += '+' * lb
2025-07-01 17:49:09.000 elif tag == 'equal':
2025-07-01 17:49:09.000 atags += ' ' * la
2025-07-01 17:49:09.000 btags += ' ' * lb
2025-07-01 17:49:09.000 else:
2025-07-01 17:49:09.001 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.001 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.001 else:
2025-07-01 17:49:09.001 # the synch pair is identical
2025-07-01 17:49:09.001 yield ' ' + aelt
2025-07-01 17:49:09.001
2025-07-01 17:49:09.001 # pump out diffs from after the synch point
2025-07-01 17:49:09.001 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.001
2025-07-01 17:49:09.001 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.001 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.001
2025-07-01 17:49:09.001 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.001 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.001 alo = 469, ahi = 1101
2025-07-01 17:49:09.001 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.001 blo = 469, bhi = 1101
2025-07-01 17:49:09.001
2025-07-01 17:49:09.001 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.001 g = []
2025-07-01 17:49:09.002 if alo < ahi:
2025-07-01 17:49:09.002 if blo < bhi:
2025-07-01 17:49:09.002 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.002 else:
2025-07-01 17:49:09.002 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.002 elif blo < bhi:
2025-07-01 17:49:09.002 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.002
2025-07-01 17:49:09.002 > yield from g
2025-07-01 17:49:09.002
2025-07-01 17:49:09.002 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.002 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.002
2025-07-01 17:49:09.002 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.002 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.002 alo = 469, ahi = 1101
2025-07-01 17:49:09.002 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.002 blo = 469, bhi = 1101
2025-07-01 17:49:09.002
2025-07-01 17:49:09.002 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.003 r"""
2025-07-01 17:49:09.003 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.003 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.003 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.003 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.003
2025-07-01 17:49:09.003 Example:
2025-07-01 17:49:09.003
2025-07-01 17:49:09.003 >>> d = Differ()
2025-07-01 17:49:09.003 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.003 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.003 >>> print(''.join(results), end="")
2025-07-01 17:49:09.003 - abcDefghiJkl
2025-07-01 17:49:09.003 + abcdefGhijkl
2025-07-01 17:49:09.003 """
2025-07-01 17:49:09.003
2025-07-01 17:49:09.003 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.003 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.003 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.003 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.004 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.004
2025-07-01 17:49:09.004 # search for the pair that matches best without being identical
2025-07-01 17:49:09.004 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.004 # on junk -- unless we have to)
2025-07-01 17:49:09.004 for j in range(blo, bhi):
2025-07-01 17:49:09.004 bj = b[j]
2025-07-01 17:49:09.004 cruncher.set_seq2(bj)
2025-07-01 17:49:09.004 for i in range(alo, ahi):
2025-07-01 17:49:09.004 ai = a[i]
2025-07-01 17:49:09.004 if ai == bj:
2025-07-01 17:49:09.004 if eqi is None:
2025-07-01 17:49:09.004 eqi, eqj = i, j
2025-07-01 17:49:09.004 continue
2025-07-01 17:49:09.004 cruncher.set_seq1(ai)
2025-07-01 17:49:09.004 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.004 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.004 # compares by a factor of 3.
2025-07-01 17:49:09.004 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.004 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.004 # of the computation is cached by cruncher
2025-07-01 17:49:09.005 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.005 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.005 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.005 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.005 if best_ratio < cutoff:
2025-07-01 17:49:09.005 # no non-identical "pretty close" pair
2025-07-01 17:49:09.005 if eqi is None:
2025-07-01 17:49:09.005 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.005 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.005 return
2025-07-01 17:49:09.005 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.005 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.005 else:
2025-07-01 17:49:09.005 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.005 eqi = None
2025-07-01 17:49:09.005
2025-07-01 17:49:09.005 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.005 # identical
2025-07-01 17:49:09.005
2025-07-01 17:49:09.005 # pump out diffs from before the synch point
2025-07-01 17:49:09.006 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.006
2025-07-01 17:49:09.006 # do intraline marking on the synch pair
2025-07-01 17:49:09.006 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.006 if eqi is None:
2025-07-01 17:49:09.006 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.006 atags = btags = ""
2025-07-01 17:49:09.006 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.006 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.006 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.006 if tag == 'replace':
2025-07-01 17:49:09.006 atags += '^' * la
2025-07-01 17:49:09.006 btags += '^' * lb
2025-07-01 17:49:09.006 elif tag == 'delete':
2025-07-01 17:49:09.006 atags += '-' * la
2025-07-01 17:49:09.006 elif tag == 'insert':
2025-07-01 17:49:09.006 btags += '+' * lb
2025-07-01 17:49:09.006 elif tag == 'equal':
2025-07-01 17:49:09.006 atags += ' ' * la
2025-07-01 17:49:09.006 btags += ' ' * lb
2025-07-01 17:49:09.006 else:
2025-07-01 17:49:09.006 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.006 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.006 else:
2025-07-01 17:49:09.006 # the synch pair is identical
2025-07-01 17:49:09.006 yield ' ' + aelt
2025-07-01 17:49:09.006
2025-07-01 17:49:09.006 # pump out diffs from after the synch point
2025-07-01 17:49:09.007 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.007
2025-07-01 17:49:09.007 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.007 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.007
2025-07-01 17:49:09.007 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.007 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.007 alo = 470, ahi = 1101
2025-07-01 17:49:09.007 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.007 blo = 470, bhi = 1101
2025-07-01 17:49:09.007
2025-07-01 17:49:09.007 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.007 g = []
2025-07-01 17:49:09.007 if alo < ahi:
2025-07-01 17:49:09.007 if blo < bhi:
2025-07-01 17:49:09.007 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.007 else:
2025-07-01 17:49:09.007 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.007 elif blo < bhi:
2025-07-01 17:49:09.007 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.007
2025-07-01 17:49:09.008 > yield from g
2025-07-01 17:49:09.008
2025-07-01 17:49:09.008 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.008 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.008
2025-07-01 17:49:09.008 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.008 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.008 alo = 470, ahi = 1101
2025-07-01 17:49:09.008 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.008 blo = 470, bhi = 1101
2025-07-01 17:49:09.008
2025-07-01 17:49:09.008 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.008 r"""
2025-07-01 17:49:09.008 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.008 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.008 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.008 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.008
2025-07-01 17:49:09.008 Example:
2025-07-01 17:49:09.009
2025-07-01 17:49:09.009 >>> d = Differ()
2025-07-01 17:49:09.009 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.009 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.009 >>> print(''.join(results), end="")
2025-07-01 17:49:09.009 - abcDefghiJkl
2025-07-01 17:49:09.009 + abcdefGhijkl
2025-07-01 17:49:09.009 """
2025-07-01 17:49:09.009
2025-07-01 17:49:09.009 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.009 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.009 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.009 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.009 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.009
2025-07-01 17:49:09.009 # search for the pair that matches best without being identical
2025-07-01 17:49:09.009 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.009 # on junk -- unless we have to)
2025-07-01 17:49:09.009 for j in range(blo, bhi):
2025-07-01 17:49:09.016 bj = b[j]
2025-07-01 17:49:09.016 cruncher.set_seq2(bj)
2025-07-01 17:49:09.016 for i in range(alo, ahi):
2025-07-01 17:49:09.016 ai = a[i]
2025-07-01 17:49:09.016 if ai == bj:
2025-07-01 17:49:09.016 if eqi is None:
2025-07-01 17:49:09.016 eqi, eqj = i, j
2025-07-01 17:49:09.016 continue
2025-07-01 17:49:09.016 cruncher.set_seq1(ai)
2025-07-01 17:49:09.016 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.016 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.016 # compares by a factor of 3.
2025-07-01 17:49:09.016 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.016 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.016 # of the computation is cached by cruncher
2025-07-01 17:49:09.016 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.016 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.016 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.016 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.016 if best_ratio < cutoff:
2025-07-01 17:49:09.016 # no non-identical "pretty close" pair
2025-07-01 17:49:09.016 if eqi is None:
2025-07-01 17:49:09.016 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.016 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.016 return
2025-07-01 17:49:09.016 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.016 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.016 else:
2025-07-01 17:49:09.016 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.016 eqi = None
2025-07-01 17:49:09.016
2025-07-01 17:49:09.016 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.016 # identical
2025-07-01 17:49:09.016
2025-07-01 17:49:09.016 # pump out diffs from before the synch point
2025-07-01 17:49:09.016 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.016
2025-07-01 17:49:09.016 # do intraline marking on the synch pair
2025-07-01 17:49:09.017 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.017 if eqi is None:
2025-07-01 17:49:09.017 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.017 atags = btags = ""
2025-07-01 17:49:09.017 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.017 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.017 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.017 if tag == 'replace':
2025-07-01 17:49:09.017 atags += '^' * la
2025-07-01 17:49:09.017 btags += '^' * lb
2025-07-01 17:49:09.017 elif tag == 'delete':
2025-07-01 17:49:09.017 atags += '-' * la
2025-07-01 17:49:09.017 elif tag == 'insert':
2025-07-01 17:49:09.017 btags += '+' * lb
2025-07-01 17:49:09.017 elif tag == 'equal':
2025-07-01 17:49:09.017 atags += ' ' * la
2025-07-01 17:49:09.017 btags += ' ' * lb
2025-07-01 17:49:09.017 else:
2025-07-01 17:49:09.017 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.017 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.018 else:
2025-07-01 17:49:09.018 # the synch pair is identical
2025-07-01 17:49:09.018 yield ' ' + aelt
2025-07-01 17:49:09.018
2025-07-01 17:49:09.018 # pump out diffs from after the synch point
2025-07-01 17:49:09.018 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.018
2025-07-01 17:49:09.018 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.018 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.018
2025-07-01 17:49:09.018 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.018 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.018 alo = 471, ahi = 1101
2025-07-01 17:49:09.018 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.018 blo = 471, bhi = 1101
2025-07-01 17:49:09.018
2025-07-01 17:49:09.018 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.018 g = []
2025-07-01 17:49:09.018 if alo < ahi:
2025-07-01 17:49:09.018 if blo < bhi:
2025-07-01 17:49:09.019 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.019 else:
2025-07-01 17:49:09.019 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.019 elif blo < bhi:
2025-07-01 17:49:09.019 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.019
2025-07-01 17:49:09.019 > yield from g
2025-07-01 17:49:09.019
2025-07-01 17:49:09.019 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.019 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.019
2025-07-01 17:49:09.019 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.019 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.019 alo = 471, ahi = 1101
2025-07-01 17:49:09.019 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.019 blo = 471, bhi = 1101
2025-07-01 17:49:09.019
2025-07-01 17:49:09.019 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.019 r"""
2025-07-01 17:49:09.019 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.020 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.020 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.020 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.020
2025-07-01 17:49:09.020 Example:
2025-07-01 17:49:09.020
2025-07-01 17:49:09.020 >>> d = Differ()
2025-07-01 17:49:09.020 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.020 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.020 >>> print(''.join(results), end="")
2025-07-01 17:49:09.020 - abcDefghiJkl
2025-07-01 17:49:09.020 + abcdefGhijkl
2025-07-01 17:49:09.020 """
2025-07-01 17:49:09.020
2025-07-01 17:49:09.020 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.020 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.020 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.020 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.021 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.021
2025-07-01 17:49:09.021 # search for the pair that matches best without being identical
2025-07-01 17:49:09.021 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.021 # on junk -- unless we have to)
2025-07-01 17:49:09.021 for j in range(blo, bhi):
2025-07-01 17:49:09.021 bj = b[j]
2025-07-01 17:49:09.021 cruncher.set_seq2(bj)
2025-07-01 17:49:09.021 for i in range(alo, ahi):
2025-07-01 17:49:09.021 ai = a[i]
2025-07-01 17:49:09.021 if ai == bj:
2025-07-01 17:49:09.021 if eqi is None:
2025-07-01 17:49:09.021 eqi, eqj = i, j
2025-07-01 17:49:09.021 continue
2025-07-01 17:49:09.021 cruncher.set_seq1(ai)
2025-07-01 17:49:09.021 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.021 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.021 # compares by a factor of 3.
2025-07-01 17:49:09.021 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.021 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.021 # of the computation is cached by cruncher
2025-07-01 17:49:09.021 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.022 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.022 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.022 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.022 if best_ratio < cutoff:
2025-07-01 17:49:09.022 # no non-identical "pretty close" pair
2025-07-01 17:49:09.022 if eqi is None:
2025-07-01 17:49:09.022 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.022 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.022 return
2025-07-01 17:49:09.022 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.022 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.022 else:
2025-07-01 17:49:09.022 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.022 eqi = None
2025-07-01 17:49:09.022
2025-07-01 17:49:09.022 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.022 # identical
2025-07-01 17:49:09.022
2025-07-01 17:49:09.022 # pump out diffs from before the synch point
2025-07-01 17:49:09.022 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.022
2025-07-01 17:49:09.023 # do intraline marking on the synch pair
2025-07-01 17:49:09.023 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.023 if eqi is None:
2025-07-01 17:49:09.023 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.023 atags = btags = ""
2025-07-01 17:49:09.023 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.023 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.023 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.023 if tag == 'replace':
2025-07-01 17:49:09.023 atags += '^' * la
2025-07-01 17:49:09.023 btags += '^' * lb
2025-07-01 17:49:09.023 elif tag == 'delete':
2025-07-01 17:49:09.023 atags += '-' * la
2025-07-01 17:49:09.023 elif tag == 'insert':
2025-07-01 17:49:09.023 btags += '+' * lb
2025-07-01 17:49:09.023 elif tag == 'equal':
2025-07-01 17:49:09.023 atags += ' ' * la
2025-07-01 17:49:09.023 btags += ' ' * lb
2025-07-01 17:49:09.023 else:
2025-07-01 17:49:09.023 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.023 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.023 else:
2025-07-01 17:49:09.024 # the synch pair is identical
2025-07-01 17:49:09.024 yield ' ' + aelt
2025-07-01 17:49:09.024
2025-07-01 17:49:09.024 # pump out diffs from after the synch point
2025-07-01 17:49:09.024 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.024
2025-07-01 17:49:09.024 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.024 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.024
2025-07-01 17:49:09.024 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.024 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.024 alo = 472, ahi = 1101
2025-07-01 17:49:09.024 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.024 blo = 472, bhi = 1101
2025-07-01 17:49:09.024
2025-07-01 17:49:09.024 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.024 g = []
2025-07-01 17:49:09.024 if alo < ahi:
2025-07-01 17:49:09.024 if blo < bhi:
2025-07-01 17:49:09.024 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.024 else:
2025-07-01 17:49:09.027 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.028 elif blo < bhi:
2025-07-01 17:49:09.028 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.028
2025-07-01 17:49:09.028 > yield from g
2025-07-01 17:49:09.028
2025-07-01 17:49:09.028 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.028 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.028
2025-07-01 17:49:09.028 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.028 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.028 alo = 472, ahi = 1101
2025-07-01 17:49:09.028 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.028 blo = 472, bhi = 1101
2025-07-01 17:49:09.028
2025-07-01 17:49:09.028 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.028 r"""
2025-07-01 17:49:09.028 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.028 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.028 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.028 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.029
2025-07-01 17:49:09.029 Example:
2025-07-01 17:49:09.029
2025-07-01 17:49:09.029 >>> d = Differ()
2025-07-01 17:49:09.029 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.029 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.029 >>> print(''.join(results), end="")
2025-07-01 17:49:09.029 - abcDefghiJkl
2025-07-01 17:49:09.029 + abcdefGhijkl
2025-07-01 17:49:09.029 """
2025-07-01 17:49:09.029
2025-07-01 17:49:09.029 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.029 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.029 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.029 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.029 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.029
2025-07-01 17:49:09.029 # search for the pair that matches best without being identical
2025-07-01 17:49:09.029 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.029 # on junk -- unless we have to)
2025-07-01 17:49:09.030 for j in range(blo, bhi):
2025-07-01 17:49:09.030 bj = b[j]
2025-07-01 17:49:09.030 cruncher.set_seq2(bj)
2025-07-01 17:49:09.030 for i in range(alo, ahi):
2025-07-01 17:49:09.030 ai = a[i]
2025-07-01 17:49:09.030 if ai == bj:
2025-07-01 17:49:09.030 if eqi is None:
2025-07-01 17:49:09.030 eqi, eqj = i, j
2025-07-01 17:49:09.030 continue
2025-07-01 17:49:09.030 cruncher.set_seq1(ai)
2025-07-01 17:49:09.030 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.030 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.030 # compares by a factor of 3.
2025-07-01 17:49:09.030 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.030 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.030 # of the computation is cached by cruncher
2025-07-01 17:49:09.030 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.030 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.030 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.031 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.031 if best_ratio < cutoff:
2025-07-01 17:49:09.031 # no non-identical "pretty close" pair
2025-07-01 17:49:09.031 if eqi is None:
2025-07-01 17:49:09.031 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.031 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.031 return
2025-07-01 17:49:09.031 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.031 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.031 else:
2025-07-01 17:49:09.031 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.031 eqi = None
2025-07-01 17:49:09.031
2025-07-01 17:49:09.031 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.031 # identical
2025-07-01 17:49:09.031
2025-07-01 17:49:09.031 # pump out diffs from before the synch point
2025-07-01 17:49:09.031 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.031
2025-07-01 17:49:09.031 # do intraline marking on the synch pair
2025-07-01 17:49:09.032 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.032 if eqi is None:
2025-07-01 17:49:09.032 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.032 atags = btags = ""
2025-07-01 17:49:09.032 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.032 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.032 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.032 if tag == 'replace':
2025-07-01 17:49:09.032 atags += '^' * la
2025-07-01 17:49:09.032 btags += '^' * lb
2025-07-01 17:49:09.032 elif tag == 'delete':
2025-07-01 17:49:09.032 atags += '-' * la
2025-07-01 17:49:09.032 elif tag == 'insert':
2025-07-01 17:49:09.032 btags += '+' * lb
2025-07-01 17:49:09.032 elif tag == 'equal':
2025-07-01 17:49:09.032 atags += ' ' * la
2025-07-01 17:49:09.032 btags += ' ' * lb
2025-07-01 17:49:09.032 else:
2025-07-01 17:49:09.032 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.032 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.032 else:
2025-07-01 17:49:09.033 # the synch pair is identical
2025-07-01 17:49:09.033 yield ' ' + aelt
2025-07-01 17:49:09.033
2025-07-01 17:49:09.033 # pump out diffs from after the synch point
2025-07-01 17:49:09.033 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.033
2025-07-01 17:49:09.033 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.033 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.033
2025-07-01 17:49:09.033 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.033 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.033 alo = 473, ahi = 1101
2025-07-01 17:49:09.033 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.033 blo = 473, bhi = 1101
2025-07-01 17:49:09.033
2025-07-01 17:49:09.033 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.033 g = []
2025-07-01 17:49:09.033 if alo < ahi:
2025-07-01 17:49:09.033 if blo < bhi:
2025-07-01 17:49:09.033 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.034 else:
2025-07-01 17:49:09.034 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.034 elif blo < bhi:
2025-07-01 17:49:09.034 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.034
2025-07-01 17:49:09.034 > yield from g
2025-07-01 17:49:09.034
2025-07-01 17:49:09.034 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.034 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.034
2025-07-01 17:49:09.034 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.034 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.034 alo = 473, ahi = 1101
2025-07-01 17:49:09.034 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.034 blo = 473, bhi = 1101
2025-07-01 17:49:09.034
2025-07-01 17:49:09.034 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.034 r"""
2025-07-01 17:49:09.034 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.034 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.034 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.035 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.035
2025-07-01 17:49:09.035 Example:
2025-07-01 17:49:09.035
2025-07-01 17:49:09.035 >>> d = Differ()
2025-07-01 17:49:09.035 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.035 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.035 >>> print(''.join(results), end="")
2025-07-01 17:49:09.035 - abcDefghiJkl
2025-07-01 17:49:09.035 + abcdefGhijkl
2025-07-01 17:49:09.035 """
2025-07-01 17:49:09.035
2025-07-01 17:49:09.035 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.035 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.035 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.035 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.035 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.035
2025-07-01 17:49:09.035 # search for the pair that matches best without being identical
2025-07-01 17:49:09.036 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.036 # on junk -- unless we have to)
2025-07-01 17:49:09.036 for j in range(blo, bhi):
2025-07-01 17:49:09.036 bj = b[j]
2025-07-01 17:49:09.036 cruncher.set_seq2(bj)
2025-07-01 17:49:09.036 for i in range(alo, ahi):
2025-07-01 17:49:09.036 ai = a[i]
2025-07-01 17:49:09.036 if ai == bj:
2025-07-01 17:49:09.036 if eqi is None:
2025-07-01 17:49:09.036 eqi, eqj = i, j
2025-07-01 17:49:09.036 continue
2025-07-01 17:49:09.036 cruncher.set_seq1(ai)
2025-07-01 17:49:09.036 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.036 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.036 # compares by a factor of 3.
2025-07-01 17:49:09.036 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.036 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.036 # of the computation is cached by cruncher
2025-07-01 17:49:09.036 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.036 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.037 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.037 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.037 if best_ratio < cutoff:
2025-07-01 17:49:09.037 # no non-identical "pretty close" pair
2025-07-01 17:49:09.037 if eqi is None:
2025-07-01 17:49:09.037 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.037 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.037 return
2025-07-01 17:49:09.037 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.037 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.037 else:
2025-07-01 17:49:09.037 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.037 eqi = None
2025-07-01 17:49:09.037
2025-07-01 17:49:09.037 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.037 # identical
2025-07-01 17:49:09.037
2025-07-01 17:49:09.037 # pump out diffs from before the synch point
2025-07-01 17:49:09.037 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.037
2025-07-01 17:49:09.037 # do intraline marking on the synch pair
2025-07-01 17:49:09.038 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.038 if eqi is None:
2025-07-01 17:49:09.038 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.038 atags = btags = ""
2025-07-01 17:49:09.038 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.038 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.038 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.038 if tag == 'replace':
2025-07-01 17:49:09.038 atags += '^' * la
2025-07-01 17:49:09.038 btags += '^' * lb
2025-07-01 17:49:09.038 elif tag == 'delete':
2025-07-01 17:49:09.038 atags += '-' * la
2025-07-01 17:49:09.038 elif tag == 'insert':
2025-07-01 17:49:09.038 btags += '+' * lb
2025-07-01 17:49:09.038 elif tag == 'equal':
2025-07-01 17:49:09.038 atags += ' ' * la
2025-07-01 17:49:09.038 btags += ' ' * lb
2025-07-01 17:49:09.038 else:
2025-07-01 17:49:09.038 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.038 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.039 else:
2025-07-01 17:49:09.039 # the synch pair is identical
2025-07-01 17:49:09.039 yield ' ' + aelt
2025-07-01 17:49:09.039
2025-07-01 17:49:09.039 # pump out diffs from after the synch point
2025-07-01 17:49:09.039 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.039
2025-07-01 17:49:09.039 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.039 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.039
2025-07-01 17:49:09.039 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.039 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.039 alo = 474, ahi = 1101
2025-07-01 17:49:09.039 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.039 blo = 474, bhi = 1101
2025-07-01 17:49:09.039
2025-07-01 17:49:09.039 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.039 g = []
2025-07-01 17:49:09.039 if alo < ahi:
2025-07-01 17:49:09.039 if blo < bhi:
2025-07-01 17:49:09.039 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.046 else:
2025-07-01 17:49:09.046 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.046 elif blo < bhi:
2025-07-01 17:49:09.046 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.046
2025-07-01 17:49:09.046 > yield from g
2025-07-01 17:49:09.046
2025-07-01 17:49:09.046 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.046 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.046
2025-07-01 17:49:09.046 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.046 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.046 alo = 474, ahi = 1101
2025-07-01 17:49:09.046 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.046 blo = 474, bhi = 1101
2025-07-01 17:49:09.046
2025-07-01 17:49:09.046 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.046 r"""
2025-07-01 17:49:09.046 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.047 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.047 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.047 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.047
2025-07-01 17:49:09.047 Example:
2025-07-01 17:49:09.047
2025-07-01 17:49:09.047 >>> d = Differ()
2025-07-01 17:49:09.047 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.047 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.047 >>> print(''.join(results), end="")
2025-07-01 17:49:09.047 - abcDefghiJkl
2025-07-01 17:49:09.047 + abcdefGhijkl
2025-07-01 17:49:09.047 """
2025-07-01 17:49:09.047
2025-07-01 17:49:09.047 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.047 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.047 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.047 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.047 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.047
2025-07-01 17:49:09.048 # search for the pair that matches best without being identical
2025-07-01 17:49:09.048 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.048 # on junk -- unless we have to)
2025-07-01 17:49:09.048 for j in range(blo, bhi):
2025-07-01 17:49:09.048 bj = b[j]
2025-07-01 17:49:09.048 cruncher.set_seq2(bj)
2025-07-01 17:49:09.048 for i in range(alo, ahi):
2025-07-01 17:49:09.048 ai = a[i]
2025-07-01 17:49:09.048 if ai == bj:
2025-07-01 17:49:09.048 if eqi is None:
2025-07-01 17:49:09.048 eqi, eqj = i, j
2025-07-01 17:49:09.048 continue
2025-07-01 17:49:09.048 cruncher.set_seq1(ai)
2025-07-01 17:49:09.048 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.048 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.048 # compares by a factor of 3.
2025-07-01 17:49:09.048 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.048 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.048 # of the computation is cached by cruncher
2025-07-01 17:49:09.048 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.048 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.049 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.049 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.049 if best_ratio < cutoff:
2025-07-01 17:49:09.049 # no non-identical "pretty close" pair
2025-07-01 17:49:09.049 if eqi is None:
2025-07-01 17:49:09.049 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.049 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.049 return
2025-07-01 17:49:09.049 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.049 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.049 else:
2025-07-01 17:49:09.049 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.049 eqi = None
2025-07-01 17:49:09.049
2025-07-01 17:49:09.049 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.049 # identical
2025-07-01 17:49:09.049
2025-07-01 17:49:09.049 # pump out diffs from before the synch point
2025-07-01 17:49:09.049 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.049
2025-07-01 17:49:09.049 # do intraline marking on the synch pair
2025-07-01 17:49:09.050 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.050 if eqi is None:
2025-07-01 17:49:09.050 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.050 atags = btags = ""
2025-07-01 17:49:09.050 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.050 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.050 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.050 if tag == 'replace':
2025-07-01 17:49:09.050 atags += '^' * la
2025-07-01 17:49:09.050 btags += '^' * lb
2025-07-01 17:49:09.050 elif tag == 'delete':
2025-07-01 17:49:09.050 atags += '-' * la
2025-07-01 17:49:09.050 elif tag == 'insert':
2025-07-01 17:49:09.050 btags += '+' * lb
2025-07-01 17:49:09.050 elif tag == 'equal':
2025-07-01 17:49:09.050 atags += ' ' * la
2025-07-01 17:49:09.050 btags += ' ' * lb
2025-07-01 17:49:09.050 else:
2025-07-01 17:49:09.050 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.050 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.050 else:
2025-07-01 17:49:09.050 # the synch pair is identical
2025-07-01 17:49:09.051 yield ' ' + aelt
2025-07-01 17:49:09.051
2025-07-01 17:49:09.051 # pump out diffs from after the synch point
2025-07-01 17:49:09.051 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.051
2025-07-01 17:49:09.051 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.051 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.051
2025-07-01 17:49:09.051 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.051 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.051 alo = 475, ahi = 1101
2025-07-01 17:49:09.051 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.051 blo = 475, bhi = 1101
2025-07-01 17:49:09.051
2025-07-01 17:49:09.051 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.051 g = []
2025-07-01 17:49:09.051 if alo < ahi:
2025-07-01 17:49:09.051 if blo < bhi:
2025-07-01 17:49:09.051 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.051 else:
2025-07-01 17:49:09.051 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.052 elif blo < bhi:
2025-07-01 17:49:09.052 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.052
2025-07-01 17:49:09.052 > yield from g
2025-07-01 17:49:09.052
2025-07-01 17:49:09.052 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.052 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.052
2025-07-01 17:49:09.052 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.052 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.052 alo = 475, ahi = 1101
2025-07-01 17:49:09.052 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.052 blo = 475, bhi = 1101
2025-07-01 17:49:09.052
2025-07-01 17:49:09.052 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.052 r"""
2025-07-01 17:49:09.052 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.052 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.052 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.053 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.053
2025-07-01 17:49:09.053 Example:
2025-07-01 17:49:09.053
2025-07-01 17:49:09.053 >>> d = Differ()
2025-07-01 17:49:09.053 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.053 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.053 >>> print(''.join(results), end="")
2025-07-01 17:49:09.053 - abcDefghiJkl
2025-07-01 17:49:09.053 + abcdefGhijkl
2025-07-01 17:49:09.053 """
2025-07-01 17:49:09.053
2025-07-01 17:49:09.053 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.053 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.053 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.053 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.053 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.053
2025-07-01 17:49:09.054 # search for the pair that matches best without being identical
2025-07-01 17:49:09.054 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.054 # on junk -- unless we have to)
2025-07-01 17:49:09.054 for j in range(blo, bhi):
2025-07-01 17:49:09.054 bj = b[j]
2025-07-01 17:49:09.054 cruncher.set_seq2(bj)
2025-07-01 17:49:09.054 for i in range(alo, ahi):
2025-07-01 17:49:09.054 ai = a[i]
2025-07-01 17:49:09.054 if ai == bj:
2025-07-01 17:49:09.054 if eqi is None:
2025-07-01 17:49:09.054 eqi, eqj = i, j
2025-07-01 17:49:09.054 continue
2025-07-01 17:49:09.054 cruncher.set_seq1(ai)
2025-07-01 17:49:09.054 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.054 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.054 # compares by a factor of 3.
2025-07-01 17:49:09.054 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.054 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.054 # of the computation is cached by cruncher
2025-07-01 17:49:09.054 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.054 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.055 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.055 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.055 if best_ratio < cutoff:
2025-07-01 17:49:09.055 # no non-identical "pretty close" pair
2025-07-01 17:49:09.055 if eqi is None:
2025-07-01 17:49:09.055 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.055 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.055 return
2025-07-01 17:49:09.055 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.055 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.055 else:
2025-07-01 17:49:09.055 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.055 eqi = None
2025-07-01 17:49:09.055
2025-07-01 17:49:09.055 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.055 # identical
2025-07-01 17:49:09.055
2025-07-01 17:49:09.055 # pump out diffs from before the synch point
2025-07-01 17:49:09.055 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.055
2025-07-01 17:49:09.056 # do intraline marking on the synch pair
2025-07-01 17:49:09.059 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.059 if eqi is None:
2025-07-01 17:49:09.059 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.059 atags = btags = ""
2025-07-01 17:49:09.059 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.059 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.059 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.059 if tag == 'replace':
2025-07-01 17:49:09.059 atags += '^' * la
2025-07-01 17:49:09.059 btags += '^' * lb
2025-07-01 17:49:09.059 elif tag == 'delete':
2025-07-01 17:49:09.059 atags += '-' * la
2025-07-01 17:49:09.059 elif tag == 'insert':
2025-07-01 17:49:09.059 btags += '+' * lb
2025-07-01 17:49:09.059 elif tag == 'equal':
2025-07-01 17:49:09.059 atags += ' ' * la
2025-07-01 17:49:09.059 btags += ' ' * lb
2025-07-01 17:49:09.059 else:
2025-07-01 17:49:09.059 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.060 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.060 else:
2025-07-01 17:49:09.060 # the synch pair is identical
2025-07-01 17:49:09.060 yield ' ' + aelt
2025-07-01 17:49:09.060
2025-07-01 17:49:09.060 # pump out diffs from after the synch point
2025-07-01 17:49:09.060 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.060
2025-07-01 17:49:09.060 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.060 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.060
2025-07-01 17:49:09.060 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.060 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.060 alo = 476, ahi = 1101
2025-07-01 17:49:09.060 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.060 blo = 476, bhi = 1101
2025-07-01 17:49:09.060
2025-07-01 17:49:09.060 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.060 g = []
2025-07-01 17:49:09.060 if alo < ahi:
2025-07-01 17:49:09.060 if blo < bhi:
2025-07-01 17:49:09.061 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.061 else:
2025-07-01 17:49:09.061 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.061 elif blo < bhi:
2025-07-01 17:49:09.061 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.061
2025-07-01 17:49:09.061 > yield from g
2025-07-01 17:49:09.061
2025-07-01 17:49:09.061 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.061 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.061
2025-07-01 17:49:09.061 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.061 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.061 alo = 476, ahi = 1101
2025-07-01 17:49:09.061 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.061 blo = 476, bhi = 1101
2025-07-01 17:49:09.061
2025-07-01 17:49:09.061 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.061 r"""
2025-07-01 17:49:09.061 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.061 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.062 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.062 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.062
2025-07-01 17:49:09.062 Example:
2025-07-01 17:49:09.062
2025-07-01 17:49:09.062 >>> d = Differ()
2025-07-01 17:49:09.062 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.062 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.062 >>> print(''.join(results), end="")
2025-07-01 17:49:09.062 - abcDefghiJkl
2025-07-01 17:49:09.062 + abcdefGhijkl
2025-07-01 17:49:09.062 """
2025-07-01 17:49:09.062
2025-07-01 17:49:09.062 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.062 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.062 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.062 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.062 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.062
2025-07-01 17:49:09.063 # search for the pair that matches best without being identical
2025-07-01 17:49:09.063 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.063 # on junk -- unless we have to)
2025-07-01 17:49:09.063 for j in range(blo, bhi):
2025-07-01 17:49:09.063 bj = b[j]
2025-07-01 17:49:09.063 cruncher.set_seq2(bj)
2025-07-01 17:49:09.063 for i in range(alo, ahi):
2025-07-01 17:49:09.063 ai = a[i]
2025-07-01 17:49:09.063 if ai == bj:
2025-07-01 17:49:09.063 if eqi is None:
2025-07-01 17:49:09.063 eqi, eqj = i, j
2025-07-01 17:49:09.063 continue
2025-07-01 17:49:09.063 cruncher.set_seq1(ai)
2025-07-01 17:49:09.063 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.063 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.063 # compares by a factor of 3.
2025-07-01 17:49:09.063 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.063 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.063 # of the computation is cached by cruncher
2025-07-01 17:49:09.063 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.064 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.064 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.064 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.064 if best_ratio < cutoff:
2025-07-01 17:49:09.064 # no non-identical "pretty close" pair
2025-07-01 17:49:09.064 if eqi is None:
2025-07-01 17:49:09.064 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.064 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.064 return
2025-07-01 17:49:09.064 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.064 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.064 else:
2025-07-01 17:49:09.064 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.064 eqi = None
2025-07-01 17:49:09.064
2025-07-01 17:49:09.064 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.064 # identical
2025-07-01 17:49:09.064
2025-07-01 17:49:09.064 # pump out diffs from before the synch point
2025-07-01 17:49:09.065 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.065
2025-07-01 17:49:09.065 # do intraline marking on the synch pair
2025-07-01 17:49:09.065 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.065 if eqi is None:
2025-07-01 17:49:09.065 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.065 atags = btags = ""
2025-07-01 17:49:09.065 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.065 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.065 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.065 if tag == 'replace':
2025-07-01 17:49:09.065 atags += '^' * la
2025-07-01 17:49:09.065 btags += '^' * lb
2025-07-01 17:49:09.065 elif tag == 'delete':
2025-07-01 17:49:09.065 atags += '-' * la
2025-07-01 17:49:09.065 elif tag == 'insert':
2025-07-01 17:49:09.065 btags += '+' * lb
2025-07-01 17:49:09.065 elif tag == 'equal':
2025-07-01 17:49:09.065 atags += ' ' * la
2025-07-01 17:49:09.065 btags += ' ' * lb
2025-07-01 17:49:09.066 else:
2025-07-01 17:49:09.066 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.066 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.066 else:
2025-07-01 17:49:09.066 # the synch pair is identical
2025-07-01 17:49:09.066 yield ' ' + aelt
2025-07-01 17:49:09.066
2025-07-01 17:49:09.066 # pump out diffs from after the synch point
2025-07-01 17:49:09.066 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.066
2025-07-01 17:49:09.066 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.066 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.066
2025-07-01 17:49:09.066 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.066 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.066 alo = 477, ahi = 1101
2025-07-01 17:49:09.066 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.066 blo = 477, bhi = 1101
2025-07-01 17:49:09.066
2025-07-01 17:49:09.066 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.066 g = []
2025-07-01 17:49:09.067 if alo < ahi:
2025-07-01 17:49:09.067 if blo < bhi:
2025-07-01 17:49:09.067 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.067 else:
2025-07-01 17:49:09.067 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.067 elif blo < bhi:
2025-07-01 17:49:09.067 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.067
2025-07-01 17:49:09.067 > yield from g
2025-07-01 17:49:09.067
2025-07-01 17:49:09.067 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.067 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.067
2025-07-01 17:49:09.067 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.067 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.067 alo = 477, ahi = 1101
2025-07-01 17:49:09.067 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.067 blo = 477, bhi = 1101
2025-07-01 17:49:09.067
2025-07-01 17:49:09.067 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.067 r"""
2025-07-01 17:49:09.068 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.068 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.068 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.068 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.068
2025-07-01 17:49:09.068 Example:
2025-07-01 17:49:09.068
2025-07-01 17:49:09.068 >>> d = Differ()
2025-07-01 17:49:09.068 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.068 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.068 >>> print(''.join(results), end="")
2025-07-01 17:49:09.068 - abcDefghiJkl
2025-07-01 17:49:09.068 + abcdefGhijkl
2025-07-01 17:49:09.068 """
2025-07-01 17:49:09.068
2025-07-01 17:49:09.068 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.068 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.068 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.068 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.068 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.069
2025-07-01 17:49:09.069 # search for the pair that matches best without being identical
2025-07-01 17:49:09.069 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.069 # on junk -- unless we have to)
2025-07-01 17:49:09.069 for j in range(blo, bhi):
2025-07-01 17:49:09.069 bj = b[j]
2025-07-01 17:49:09.069 cruncher.set_seq2(bj)
2025-07-01 17:49:09.069 for i in range(alo, ahi):
2025-07-01 17:49:09.069 ai = a[i]
2025-07-01 17:49:09.069 if ai == bj:
2025-07-01 17:49:09.069 if eqi is None:
2025-07-01 17:49:09.069 eqi, eqj = i, j
2025-07-01 17:49:09.069 continue
2025-07-01 17:49:09.069 cruncher.set_seq1(ai)
2025-07-01 17:49:09.069 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.069 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.069 # compares by a factor of 3.
2025-07-01 17:49:09.069 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.069 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.069 # of the computation is cached by cruncher
2025-07-01 17:49:09.069 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.070 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.070 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.070 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.070 if best_ratio < cutoff:
2025-07-01 17:49:09.070 # no non-identical "pretty close" pair
2025-07-01 17:49:09.070 if eqi is None:
2025-07-01 17:49:09.070 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.070 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.070 return
2025-07-01 17:49:09.070 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.070 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.070 else:
2025-07-01 17:49:09.070 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.070 eqi = None
2025-07-01 17:49:09.070
2025-07-01 17:49:09.070 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.070 # identical
2025-07-01 17:49:09.070
2025-07-01 17:49:09.070 # pump out diffs from before the synch point
2025-07-01 17:49:09.070 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.070
2025-07-01 17:49:09.071 # do intraline marking on the synch pair
2025-07-01 17:49:09.071 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.071 if eqi is None:
2025-07-01 17:49:09.071 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.071 atags = btags = ""
2025-07-01 17:49:09.071 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.071 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.071 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.071 if tag == 'replace':
2025-07-01 17:49:09.071 atags += '^' * la
2025-07-01 17:49:09.071 btags += '^' * lb
2025-07-01 17:49:09.071 elif tag == 'delete':
2025-07-01 17:49:09.071 atags += '-' * la
2025-07-01 17:49:09.071 elif tag == 'insert':
2025-07-01 17:49:09.071 btags += '+' * lb
2025-07-01 17:49:09.071 elif tag == 'equal':
2025-07-01 17:49:09.071 atags += ' ' * la
2025-07-01 17:49:09.071 btags += ' ' * lb
2025-07-01 17:49:09.071 else:
2025-07-01 17:49:09.071 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.071 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.072 else:
2025-07-01 17:49:09.076 # the synch pair is identical
2025-07-01 17:49:09.076 yield ' ' + aelt
2025-07-01 17:49:09.076
2025-07-01 17:49:09.077 # pump out diffs from after the synch point
2025-07-01 17:49:09.077 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.077
2025-07-01 17:49:09.077 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.077 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.077
2025-07-01 17:49:09.077 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.077 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.077 alo = 478, ahi = 1101
2025-07-01 17:49:09.077 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.077 blo = 478, bhi = 1101
2025-07-01 17:49:09.077
2025-07-01 17:49:09.077 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.077 g = []
2025-07-01 17:49:09.077 if alo < ahi:
2025-07-01 17:49:09.077 if blo < bhi:
2025-07-01 17:49:09.077 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.077 else:
2025-07-01 17:49:09.078 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.078 elif blo < bhi:
2025-07-01 17:49:09.078 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.078
2025-07-01 17:49:09.078 > yield from g
2025-07-01 17:49:09.078
2025-07-01 17:49:09.078 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.078 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.078
2025-07-01 17:49:09.078 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.078 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.078 alo = 478, ahi = 1101
2025-07-01 17:49:09.078 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.078 blo = 478, bhi = 1101
2025-07-01 17:49:09.078
2025-07-01 17:49:09.078 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.078 r"""
2025-07-01 17:49:09.078 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.078 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.079 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.079 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.079
2025-07-01 17:49:09.079 Example:
2025-07-01 17:49:09.079
2025-07-01 17:49:09.079 >>> d = Differ()
2025-07-01 17:49:09.079 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.079 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.079 >>> print(''.join(results), end="")
2025-07-01 17:49:09.079 - abcDefghiJkl
2025-07-01 17:49:09.079 + abcdefGhijkl
2025-07-01 17:49:09.079 """
2025-07-01 17:49:09.079
2025-07-01 17:49:09.079 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.079 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.079 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.079 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.079 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.079
2025-07-01 17:49:09.080 # search for the pair that matches best without being identical
2025-07-01 17:49:09.080 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.080 # on junk -- unless we have to)
2025-07-01 17:49:09.080 for j in range(blo, bhi):
2025-07-01 17:49:09.080 bj = b[j]
2025-07-01 17:49:09.080 cruncher.set_seq2(bj)
2025-07-01 17:49:09.080 for i in range(alo, ahi):
2025-07-01 17:49:09.080 ai = a[i]
2025-07-01 17:49:09.080 if ai == bj:
2025-07-01 17:49:09.080 if eqi is None:
2025-07-01 17:49:09.080 eqi, eqj = i, j
2025-07-01 17:49:09.080 continue
2025-07-01 17:49:09.080 cruncher.set_seq1(ai)
2025-07-01 17:49:09.080 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.080 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.080 # compares by a factor of 3.
2025-07-01 17:49:09.080 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.080 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.080 # of the computation is cached by cruncher
2025-07-01 17:49:09.080 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.081 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.081 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.081 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.081 if best_ratio < cutoff:
2025-07-01 17:49:09.081 # no non-identical "pretty close" pair
2025-07-01 17:49:09.081 if eqi is None:
2025-07-01 17:49:09.081 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.081 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.081 return
2025-07-01 17:49:09.081 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.081 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.081 else:
2025-07-01 17:49:09.081 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.081 eqi = None
2025-07-01 17:49:09.081
2025-07-01 17:49:09.081 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.081 # identical
2025-07-01 17:49:09.081
2025-07-01 17:49:09.081 # pump out diffs from before the synch point
2025-07-01 17:49:09.081 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.082
2025-07-01 17:49:09.082 # do intraline marking on the synch pair
2025-07-01 17:49:09.082 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.082 if eqi is None:
2025-07-01 17:49:09.082 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.082 atags = btags = ""
2025-07-01 17:49:09.082 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.082 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.082 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.082 if tag == 'replace':
2025-07-01 17:49:09.082 atags += '^' * la
2025-07-01 17:49:09.082 btags += '^' * lb
2025-07-01 17:49:09.082 elif tag == 'delete':
2025-07-01 17:49:09.082 atags += '-' * la
2025-07-01 17:49:09.082 elif tag == 'insert':
2025-07-01 17:49:09.082 btags += '+' * lb
2025-07-01 17:49:09.082 elif tag == 'equal':
2025-07-01 17:49:09.082 atags += ' ' * la
2025-07-01 17:49:09.082 btags += ' ' * lb
2025-07-01 17:49:09.082 else:
2025-07-01 17:49:09.082 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.083 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.083 else:
2025-07-01 17:49:09.083 # the synch pair is identical
2025-07-01 17:49:09.083 yield ' ' + aelt
2025-07-01 17:49:09.083
2025-07-01 17:49:09.083 # pump out diffs from after the synch point
2025-07-01 17:49:09.083 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.083
2025-07-01 17:49:09.083 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.083 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.083
2025-07-01 17:49:09.083 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.083 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.083 alo = 479, ahi = 1101
2025-07-01 17:49:09.083 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.083 blo = 479, bhi = 1101
2025-07-01 17:49:09.083
2025-07-01 17:49:09.083 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.083 g = []
2025-07-01 17:49:09.083 if alo < ahi:
2025-07-01 17:49:09.084 if blo < bhi:
2025-07-01 17:49:09.084 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.084 else:
2025-07-01 17:49:09.084 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.084 elif blo < bhi:
2025-07-01 17:49:09.084 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.084
2025-07-01 17:49:09.084 > yield from g
2025-07-01 17:49:09.084
2025-07-01 17:49:09.084 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.084 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.084
2025-07-01 17:49:09.084 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.084 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.084 alo = 479, ahi = 1101
2025-07-01 17:49:09.084 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.084 blo = 479, bhi = 1101
2025-07-01 17:49:09.084
2025-07-01 17:49:09.084 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.084 r"""
2025-07-01 17:49:09.085 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.085 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.085 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.085 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.085
2025-07-01 17:49:09.085 Example:
2025-07-01 17:49:09.085
2025-07-01 17:49:09.085 >>> d = Differ()
2025-07-01 17:49:09.085 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.085 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.085 >>> print(''.join(results), end="")
2025-07-01 17:49:09.085 - abcDefghiJkl
2025-07-01 17:49:09.085 + abcdefGhijkl
2025-07-01 17:49:09.085 """
2025-07-01 17:49:09.085
2025-07-01 17:49:09.085 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.085 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.085 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.085 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.086 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.086
2025-07-01 17:49:09.086 # search for the pair that matches best without being identical
2025-07-01 17:49:09.086 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.086 # on junk -- unless we have to)
2025-07-01 17:49:09.086 for j in range(blo, bhi):
2025-07-01 17:49:09.086 bj = b[j]
2025-07-01 17:49:09.086 cruncher.set_seq2(bj)
2025-07-01 17:49:09.086 for i in range(alo, ahi):
2025-07-01 17:49:09.086 ai = a[i]
2025-07-01 17:49:09.086 if ai == bj:
2025-07-01 17:49:09.086 if eqi is None:
2025-07-01 17:49:09.086 eqi, eqj = i, j
2025-07-01 17:49:09.086 continue
2025-07-01 17:49:09.086 cruncher.set_seq1(ai)
2025-07-01 17:49:09.086 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.086 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.086 # compares by a factor of 3.
2025-07-01 17:49:09.089 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.090 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.090 # of the computation is cached by cruncher
2025-07-01 17:49:09.090 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.090 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.090 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.090 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.090 if best_ratio < cutoff:
2025-07-01 17:49:09.090 # no non-identical "pretty close" pair
2025-07-01 17:49:09.090 if eqi is None:
2025-07-01 17:49:09.090 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.090 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.090 return
2025-07-01 17:49:09.090 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.090 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.090 else:
2025-07-01 17:49:09.090 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.090 eqi = None
2025-07-01 17:49:09.090
2025-07-01 17:49:09.090 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.091 # identical
2025-07-01 17:49:09.091
2025-07-01 17:49:09.091 # pump out diffs from before the synch point
2025-07-01 17:49:09.091 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.091
2025-07-01 17:49:09.091 # do intraline marking on the synch pair
2025-07-01 17:49:09.091 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.091 if eqi is None:
2025-07-01 17:49:09.091 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.091 atags = btags = ""
2025-07-01 17:49:09.091 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.091 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.091 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.091 if tag == 'replace':
2025-07-01 17:49:09.091 atags += '^' * la
2025-07-01 17:49:09.091 btags += '^' * lb
2025-07-01 17:49:09.091 elif tag == 'delete':
2025-07-01 17:49:09.091 atags += '-' * la
2025-07-01 17:49:09.091 elif tag == 'insert':
2025-07-01 17:49:09.091 btags += '+' * lb
2025-07-01 17:49:09.091 elif tag == 'equal':
2025-07-01 17:49:09.092 atags += ' ' * la
2025-07-01 17:49:09.092 btags += ' ' * lb
2025-07-01 17:49:09.092 else:
2025-07-01 17:49:09.092 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.092 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.092 else:
2025-07-01 17:49:09.092 # the synch pair is identical
2025-07-01 17:49:09.092 yield ' ' + aelt
2025-07-01 17:49:09.092
2025-07-01 17:49:09.092 # pump out diffs from after the synch point
2025-07-01 17:49:09.092 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.092
2025-07-01 17:49:09.092 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.092 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.092
2025-07-01 17:49:09.092 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.092 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.092 alo = 480, ahi = 1101
2025-07-01 17:49:09.092 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.093 blo = 480, bhi = 1101
2025-07-01 17:49:09.093
2025-07-01 17:49:09.093 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.093 g = []
2025-07-01 17:49:09.093 if alo < ahi:
2025-07-01 17:49:09.093 if blo < bhi:
2025-07-01 17:49:09.093 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.093 else:
2025-07-01 17:49:09.093 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.093 elif blo < bhi:
2025-07-01 17:49:09.093 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.093
2025-07-01 17:49:09.093 > yield from g
2025-07-01 17:49:09.093
2025-07-01 17:49:09.093 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.093 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.093
2025-07-01 17:49:09.093 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.093 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.093 alo = 480, ahi = 1101
2025-07-01 17:49:09.094 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.094 blo = 480, bhi = 1101
2025-07-01 17:49:09.094
2025-07-01 17:49:09.094 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.094 r"""
2025-07-01 17:49:09.094 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.094 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.094 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.094 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.094
2025-07-01 17:49:09.094 Example:
2025-07-01 17:49:09.094
2025-07-01 17:49:09.094 >>> d = Differ()
2025-07-01 17:49:09.094 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.094 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.094 >>> print(''.join(results), end="")
2025-07-01 17:49:09.094 - abcDefghiJkl
2025-07-01 17:49:09.094 + abcdefGhijkl
2025-07-01 17:49:09.094 """
2025-07-01 17:49:09.095
2025-07-01 17:49:09.095 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.095 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.095 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.095 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.095 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.095
2025-07-01 17:49:09.095 # search for the pair that matches best without being identical
2025-07-01 17:49:09.095 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.095 # on junk -- unless we have to)
2025-07-01 17:49:09.095 for j in range(blo, bhi):
2025-07-01 17:49:09.095 bj = b[j]
2025-07-01 17:49:09.095 cruncher.set_seq2(bj)
2025-07-01 17:49:09.095 for i in range(alo, ahi):
2025-07-01 17:49:09.095 ai = a[i]
2025-07-01 17:49:09.095 if ai == bj:
2025-07-01 17:49:09.095 if eqi is None:
2025-07-01 17:49:09.095 eqi, eqj = i, j
2025-07-01 17:49:09.095 continue
2025-07-01 17:49:09.095 cruncher.set_seq1(ai)
2025-07-01 17:49:09.096 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.096 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.096 # compares by a factor of 3.
2025-07-01 17:49:09.096 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.096 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.096 # of the computation is cached by cruncher
2025-07-01 17:49:09.096 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.096 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.096 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.096 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.096 if best_ratio < cutoff:
2025-07-01 17:49:09.096 # no non-identical "pretty close" pair
2025-07-01 17:49:09.096 if eqi is None:
2025-07-01 17:49:09.096 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.096 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.096 return
2025-07-01 17:49:09.096 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.096 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.096 else:
2025-07-01 17:49:09.096 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.096 eqi = None
2025-07-01 17:49:09.097
2025-07-01 17:49:09.097 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.097 # identical
2025-07-01 17:49:09.097
2025-07-01 17:49:09.097 # pump out diffs from before the synch point
2025-07-01 17:49:09.097 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.097
2025-07-01 17:49:09.097 # do intraline marking on the synch pair
2025-07-01 17:49:09.097 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.097 if eqi is None:
2025-07-01 17:49:09.097 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.097 atags = btags = ""
2025-07-01 17:49:09.097 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.097 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.097 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.097 if tag == 'replace':
2025-07-01 17:49:09.097 atags += '^' * la
2025-07-01 17:49:09.097 btags += '^' * lb
2025-07-01 17:49:09.098 elif tag == 'delete':
2025-07-01 17:49:09.098 atags += '-' * la
2025-07-01 17:49:09.098 elif tag == 'insert':
2025-07-01 17:49:09.098 btags += '+' * lb
2025-07-01 17:49:09.098 elif tag == 'equal':
2025-07-01 17:49:09.098 atags += ' ' * la
2025-07-01 17:49:09.098 btags += ' ' * lb
2025-07-01 17:49:09.098 else:
2025-07-01 17:49:09.098 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.098 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.098 else:
2025-07-01 17:49:09.098 # the synch pair is identical
2025-07-01 17:49:09.098 yield ' ' + aelt
2025-07-01 17:49:09.098
2025-07-01 17:49:09.098 # pump out diffs from after the synch point
2025-07-01 17:49:09.098 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.098
2025-07-01 17:49:09.098 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.098 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.098
2025-07-01 17:49:09.099 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.099 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.099 alo = 481, ahi = 1101
2025-07-01 17:49:09.099 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.099 blo = 481, bhi = 1101
2025-07-01 17:49:09.099
2025-07-01 17:49:09.099 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.099 g = []
2025-07-01 17:49:09.099 if alo < ahi:
2025-07-01 17:49:09.099 if blo < bhi:
2025-07-01 17:49:09.099 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.099 else:
2025-07-01 17:49:09.099 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.099 elif blo < bhi:
2025-07-01 17:49:09.099 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.099
2025-07-01 17:49:09.099 > yield from g
2025-07-01 17:49:09.099
2025-07-01 17:49:09.099 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.099 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.099
2025-07-01 17:49:09.100 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.100 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.100 alo = 481, ahi = 1101
2025-07-01 17:49:09.100 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.100 blo = 481, bhi = 1101
2025-07-01 17:49:09.100
2025-07-01 17:49:09.100 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.100 r"""
2025-07-01 17:49:09.100 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.100 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.100 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.100 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.100
2025-07-01 17:49:09.100 Example:
2025-07-01 17:49:09.100
2025-07-01 17:49:09.100 >>> d = Differ()
2025-07-01 17:49:09.100 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.100 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.100 >>> print(''.join(results), end="")
2025-07-01 17:49:09.101 - abcDefghiJkl
2025-07-01 17:49:09.101 + abcdefGhijkl
2025-07-01 17:49:09.101 """
2025-07-01 17:49:09.101
2025-07-01 17:49:09.101 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.101 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.101 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.101 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.101 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.101
2025-07-01 17:49:09.101 # search for the pair that matches best without being identical
2025-07-01 17:49:09.101 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.101 # on junk -- unless we have to)
2025-07-01 17:49:09.101 for j in range(blo, bhi):
2025-07-01 17:49:09.101 bj = b[j]
2025-07-01 17:49:09.101 cruncher.set_seq2(bj)
2025-07-01 17:49:09.101 for i in range(alo, ahi):
2025-07-01 17:49:09.101 ai = a[i]
2025-07-01 17:49:09.101 if ai == bj:
2025-07-01 17:49:09.102 if eqi is None:
2025-07-01 17:49:09.102 eqi, eqj = i, j
2025-07-01 17:49:09.102 continue
2025-07-01 17:49:09.102 cruncher.set_seq1(ai)
2025-07-01 17:49:09.102 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.102 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.102 # compares by a factor of 3.
2025-07-01 17:49:09.102 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.102 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.102 # of the computation is cached by cruncher
2025-07-01 17:49:09.102 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.102 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.102 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.102 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.102 if best_ratio < cutoff:
2025-07-01 17:49:09.102 # no non-identical "pretty close" pair
2025-07-01 17:49:09.102 if eqi is None:
2025-07-01 17:49:09.102 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.102 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.102 return
2025-07-01 17:49:09.102 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.108 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.108 else:
2025-07-01 17:49:09.108 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.108 eqi = None
2025-07-01 17:49:09.108
2025-07-01 17:49:09.108 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.108 # identical
2025-07-01 17:49:09.108
2025-07-01 17:49:09.108 # pump out diffs from before the synch point
2025-07-01 17:49:09.108 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.108
2025-07-01 17:49:09.108 # do intraline marking on the synch pair
2025-07-01 17:49:09.108 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.108 if eqi is None:
2025-07-01 17:49:09.108 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.108 atags = btags = ""
2025-07-01 17:49:09.108 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.108 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.108 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.109 if tag == 'replace':
2025-07-01 17:49:09.109 atags += '^' * la
2025-07-01 17:49:09.109 btags += '^' * lb
2025-07-01 17:49:09.109 elif tag == 'delete':
2025-07-01 17:49:09.109 atags += '-' * la
2025-07-01 17:49:09.109 elif tag == 'insert':
2025-07-01 17:49:09.109 btags += '+' * lb
2025-07-01 17:49:09.109 elif tag == 'equal':
2025-07-01 17:49:09.109 atags += ' ' * la
2025-07-01 17:49:09.109 btags += ' ' * lb
2025-07-01 17:49:09.109 else:
2025-07-01 17:49:09.109 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.109 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.109 else:
2025-07-01 17:49:09.109 # the synch pair is identical
2025-07-01 17:49:09.109 yield ' ' + aelt
2025-07-01 17:49:09.109
2025-07-01 17:49:09.109 # pump out diffs from after the synch point
2025-07-01 17:49:09.109 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.109
2025-07-01 17:49:09.110 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.110 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.110
2025-07-01 17:49:09.110 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.110 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.110 alo = 482, ahi = 1101
2025-07-01 17:49:09.110 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.110 blo = 482, bhi = 1101
2025-07-01 17:49:09.110
2025-07-01 17:49:09.110 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.110 g = []
2025-07-01 17:49:09.110 if alo < ahi:
2025-07-01 17:49:09.110 if blo < bhi:
2025-07-01 17:49:09.110 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.110 else:
2025-07-01 17:49:09.110 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.110 elif blo < bhi:
2025-07-01 17:49:09.110 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.110
2025-07-01 17:49:09.110 > yield from g
2025-07-01 17:49:09.110
2025-07-01 17:49:09.111 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.111 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.111
2025-07-01 17:49:09.111 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.111 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.111 alo = 482, ahi = 1101
2025-07-01 17:49:09.111 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.111 blo = 482, bhi = 1101
2025-07-01 17:49:09.111
2025-07-01 17:49:09.111 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.111 r"""
2025-07-01 17:49:09.111 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.111 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.111 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.111 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.111
2025-07-01 17:49:09.111 Example:
2025-07-01 17:49:09.111
2025-07-01 17:49:09.112 >>> d = Differ()
2025-07-01 17:49:09.112 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.112 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.112 >>> print(''.join(results), end="")
2025-07-01 17:49:09.112 - abcDefghiJkl
2025-07-01 17:49:09.112 + abcdefGhijkl
2025-07-01 17:49:09.112 """
2025-07-01 17:49:09.112
2025-07-01 17:49:09.112 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.112 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.112 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.112 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.112 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.112
2025-07-01 17:49:09.112 # search for the pair that matches best without being identical
2025-07-01 17:49:09.112 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.112 # on junk -- unless we have to)
2025-07-01 17:49:09.112 for j in range(blo, bhi):
2025-07-01 17:49:09.112 bj = b[j]
2025-07-01 17:49:09.112 cruncher.set_seq2(bj)
2025-07-01 17:49:09.112 for i in range(alo, ahi):
2025-07-01 17:49:09.112 ai = a[i]
2025-07-01 17:49:09.113 if ai == bj:
2025-07-01 17:49:09.113 if eqi is None:
2025-07-01 17:49:09.113 eqi, eqj = i, j
2025-07-01 17:49:09.113 continue
2025-07-01 17:49:09.113 cruncher.set_seq1(ai)
2025-07-01 17:49:09.113 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.113 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.113 # compares by a factor of 3.
2025-07-01 17:49:09.113 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.113 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.113 # of the computation is cached by cruncher
2025-07-01 17:49:09.113 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.113 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.113 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.113 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.113 if best_ratio < cutoff:
2025-07-01 17:49:09.113 # no non-identical "pretty close" pair
2025-07-01 17:49:09.113 if eqi is None:
2025-07-01 17:49:09.113 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.113 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.113 return
2025-07-01 17:49:09.114 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.114 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.114 else:
2025-07-01 17:49:09.114 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.114 eqi = None
2025-07-01 17:49:09.114
2025-07-01 17:49:09.114 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.114 # identical
2025-07-01 17:49:09.114
2025-07-01 17:49:09.114 # pump out diffs from before the synch point
2025-07-01 17:49:09.114 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.114
2025-07-01 17:49:09.114 # do intraline marking on the synch pair
2025-07-01 17:49:09.114 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.114 if eqi is None:
2025-07-01 17:49:09.114 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.114 atags = btags = ""
2025-07-01 17:49:09.114 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.114 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.114 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.114 if tag == 'replace':
2025-07-01 17:49:09.115 atags += '^' * la
2025-07-01 17:49:09.115 btags += '^' * lb
2025-07-01 17:49:09.115 elif tag == 'delete':
2025-07-01 17:49:09.115 atags += '-' * la
2025-07-01 17:49:09.115 elif tag == 'insert':
2025-07-01 17:49:09.115 btags += '+' * lb
2025-07-01 17:49:09.115 elif tag == 'equal':
2025-07-01 17:49:09.115 atags += ' ' * la
2025-07-01 17:49:09.115 btags += ' ' * lb
2025-07-01 17:49:09.115 else:
2025-07-01 17:49:09.115 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.115 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.115 else:
2025-07-01 17:49:09.115 # the synch pair is identical
2025-07-01 17:49:09.115 yield ' ' + aelt
2025-07-01 17:49:09.115
2025-07-01 17:49:09.115 # pump out diffs from after the synch point
2025-07-01 17:49:09.115 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.115
2025-07-01 17:49:09.115 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.115 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.115
2025-07-01 17:49:09.116 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.116 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.116 alo = 483, ahi = 1101
2025-07-01 17:49:09.116 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.116 blo = 483, bhi = 1101
2025-07-01 17:49:09.116
2025-07-01 17:49:09.116 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.116 g = []
2025-07-01 17:49:09.116 if alo < ahi:
2025-07-01 17:49:09.116 if blo < bhi:
2025-07-01 17:49:09.116 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.116 else:
2025-07-01 17:49:09.116 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.116 elif blo < bhi:
2025-07-01 17:49:09.116 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.116
2025-07-01 17:49:09.116 > yield from g
2025-07-01 17:49:09.116
2025-07-01 17:49:09.116 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.116 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.117
2025-07-01 17:49:09.117 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.117 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.117 alo = 483, ahi = 1101
2025-07-01 17:49:09.117 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.117 blo = 483, bhi = 1101
2025-07-01 17:49:09.117
2025-07-01 17:49:09.117 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.117 r"""
2025-07-01 17:49:09.117 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.117 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.117 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.117 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.117
2025-07-01 17:49:09.117 Example:
2025-07-01 17:49:09.117
2025-07-01 17:49:09.117 >>> d = Differ()
2025-07-01 17:49:09.117 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.117 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.117 >>> print(''.join(results), end="")
2025-07-01 17:49:09.117 - abcDefghiJkl
2025-07-01 17:49:09.120 + abcdefGhijkl
2025-07-01 17:49:09.121 """
2025-07-01 17:49:09.121
2025-07-01 17:49:09.121 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.121 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.121 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.121 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.121 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.121
2025-07-01 17:49:09.121 # search for the pair that matches best without being identical
2025-07-01 17:49:09.121 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.121 # on junk -- unless we have to)
2025-07-01 17:49:09.121 for j in range(blo, bhi):
2025-07-01 17:49:09.121 bj = b[j]
2025-07-01 17:49:09.121 cruncher.set_seq2(bj)
2025-07-01 17:49:09.121 for i in range(alo, ahi):
2025-07-01 17:49:09.121 ai = a[i]
2025-07-01 17:49:09.121 if ai == bj:
2025-07-01 17:49:09.121 if eqi is None:
2025-07-01 17:49:09.121 eqi, eqj = i, j
2025-07-01 17:49:09.122 continue
2025-07-01 17:49:09.122 cruncher.set_seq1(ai)
2025-07-01 17:49:09.122 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.122 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.122 # compares by a factor of 3.
2025-07-01 17:49:09.122 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.122 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.122 # of the computation is cached by cruncher
2025-07-01 17:49:09.122 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.122 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.122 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.122 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.122 if best_ratio < cutoff:
2025-07-01 17:49:09.122 # no non-identical "pretty close" pair
2025-07-01 17:49:09.122 if eqi is None:
2025-07-01 17:49:09.122 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.122 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.122 return
2025-07-01 17:49:09.123 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.123 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.123 else:
2025-07-01 17:49:09.123 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.123 eqi = None
2025-07-01 17:49:09.123
2025-07-01 17:49:09.123 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.123 # identical
2025-07-01 17:49:09.123
2025-07-01 17:49:09.123 # pump out diffs from before the synch point
2025-07-01 17:49:09.123 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.123
2025-07-01 17:49:09.123 # do intraline marking on the synch pair
2025-07-01 17:49:09.123 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.123 if eqi is None:
2025-07-01 17:49:09.123 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.123 atags = btags = ""
2025-07-01 17:49:09.123 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.123 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.123 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.123 if tag == 'replace':
2025-07-01 17:49:09.124 atags += '^' * la
2025-07-01 17:49:09.124 btags += '^' * lb
2025-07-01 17:49:09.124 elif tag == 'delete':
2025-07-01 17:49:09.124 atags += '-' * la
2025-07-01 17:49:09.124 elif tag == 'insert':
2025-07-01 17:49:09.124 btags += '+' * lb
2025-07-01 17:49:09.125 elif tag == 'equal':
2025-07-01 17:49:09.125 atags += ' ' * la
2025-07-01 17:49:09.125 btags += ' ' * lb
2025-07-01 17:49:09.125 else:
2025-07-01 17:49:09.125 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.125 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.125 else:
2025-07-01 17:49:09.125 # the synch pair is identical
2025-07-01 17:49:09.125 yield ' ' + aelt
2025-07-01 17:49:09.125
2025-07-01 17:49:09.125 # pump out diffs from after the synch point
2025-07-01 17:49:09.125 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.125
2025-07-01 17:49:09.125 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.125 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.125
2025-07-01 17:49:09.125 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.125 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.125 alo = 484, ahi = 1101
2025-07-01 17:49:09.125 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.126 blo = 484, bhi = 1101
2025-07-01 17:49:09.126
2025-07-01 17:49:09.126 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.126 g = []
2025-07-01 17:49:09.126 if alo < ahi:
2025-07-01 17:49:09.126 if blo < bhi:
2025-07-01 17:49:09.126 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.126 else:
2025-07-01 17:49:09.126 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.126 elif blo < bhi:
2025-07-01 17:49:09.126 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.126
2025-07-01 17:49:09.126 > yield from g
2025-07-01 17:49:09.126
2025-07-01 17:49:09.126 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.126 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.126
2025-07-01 17:49:09.126 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.126 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.126 alo = 484, ahi = 1101
2025-07-01 17:49:09.126 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.127 blo = 484, bhi = 1101
2025-07-01 17:49:09.127
2025-07-01 17:49:09.127 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.127 r"""
2025-07-01 17:49:09.127 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.127 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.127 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.127 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.127
2025-07-01 17:49:09.127 Example:
2025-07-01 17:49:09.127
2025-07-01 17:49:09.127 >>> d = Differ()
2025-07-01 17:49:09.127 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.127 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.127 >>> print(''.join(results), end="")
2025-07-01 17:49:09.127 - abcDefghiJkl
2025-07-01 17:49:09.127 + abcdefGhijkl
2025-07-01 17:49:09.127 """
2025-07-01 17:49:09.127
2025-07-01 17:49:09.128 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.128 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.128 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.128 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.128 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.128
2025-07-01 17:49:09.128 # search for the pair that matches best without being identical
2025-07-01 17:49:09.128 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.128 # on junk -- unless we have to)
2025-07-01 17:49:09.128 for j in range(blo, bhi):
2025-07-01 17:49:09.128 bj = b[j]
2025-07-01 17:49:09.128 cruncher.set_seq2(bj)
2025-07-01 17:49:09.128 for i in range(alo, ahi):
2025-07-01 17:49:09.128 ai = a[i]
2025-07-01 17:49:09.128 if ai == bj:
2025-07-01 17:49:09.128 if eqi is None:
2025-07-01 17:49:09.128 eqi, eqj = i, j
2025-07-01 17:49:09.128 continue
2025-07-01 17:49:09.128 cruncher.set_seq1(ai)
2025-07-01 17:49:09.128 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.128 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.128 # compares by a factor of 3.
2025-07-01 17:49:09.129 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.129 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.129 # of the computation is cached by cruncher
2025-07-01 17:49:09.129 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.129 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.129 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.129 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.129 if best_ratio < cutoff:
2025-07-01 17:49:09.129 # no non-identical "pretty close" pair
2025-07-01 17:49:09.129 if eqi is None:
2025-07-01 17:49:09.129 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.129 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.129 return
2025-07-01 17:49:09.129 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.129 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.129 else:
2025-07-01 17:49:09.129 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.129 eqi = None
2025-07-01 17:49:09.129
2025-07-01 17:49:09.129 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.129 # identical
2025-07-01 17:49:09.130
2025-07-01 17:49:09.130 # pump out diffs from before the synch point
2025-07-01 17:49:09.130 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.130
2025-07-01 17:49:09.130 # do intraline marking on the synch pair
2025-07-01 17:49:09.130 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.130 if eqi is None:
2025-07-01 17:49:09.130 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.130 atags = btags = ""
2025-07-01 17:49:09.130 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.130 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.130 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.130 if tag == 'replace':
2025-07-01 17:49:09.130 atags += '^' * la
2025-07-01 17:49:09.130 btags += '^' * lb
2025-07-01 17:49:09.130 elif tag == 'delete':
2025-07-01 17:49:09.130 atags += '-' * la
2025-07-01 17:49:09.130 elif tag == 'insert':
2025-07-01 17:49:09.130 btags += '+' * lb
2025-07-01 17:49:09.130 elif tag == 'equal':
2025-07-01 17:49:09.130 atags += ' ' * la
2025-07-01 17:49:09.130 btags += ' ' * lb
2025-07-01 17:49:09.131 else:
2025-07-01 17:49:09.131 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.131 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.131 else:
2025-07-01 17:49:09.131 # the synch pair is identical
2025-07-01 17:49:09.131 yield ' ' + aelt
2025-07-01 17:49:09.131
2025-07-01 17:49:09.131 # pump out diffs from after the synch point
2025-07-01 17:49:09.131 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.131
2025-07-01 17:49:09.131 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.131 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.131
2025-07-01 17:49:09.131 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.131 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.131 alo = 485, ahi = 1101
2025-07-01 17:49:09.131 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.131 blo = 485, bhi = 1101
2025-07-01 17:49:09.131
2025-07-01 17:49:09.132 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.132 g = []
2025-07-01 17:49:09.132 if alo < ahi:
2025-07-01 17:49:09.132 if blo < bhi:
2025-07-01 17:49:09.132 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.132 else:
2025-07-01 17:49:09.132 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.132 elif blo < bhi:
2025-07-01 17:49:09.132 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.132
2025-07-01 17:49:09.132 > yield from g
2025-07-01 17:49:09.132
2025-07-01 17:49:09.132 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.132 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.132
2025-07-01 17:49:09.132 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.132 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.132 alo = 485, ahi = 1101
2025-07-01 17:49:09.132 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.132 blo = 485, bhi = 1101
2025-07-01 17:49:09.133
2025-07-01 17:49:09.133 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.133 r"""
2025-07-01 17:49:09.133 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.133 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.133 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.133 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.133
2025-07-01 17:49:09.133 Example:
2025-07-01 17:49:09.133
2025-07-01 17:49:09.133 >>> d = Differ()
2025-07-01 17:49:09.133 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.133 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.133 >>> print(''.join(results), end="")
2025-07-01 17:49:09.133 - abcDefghiJkl
2025-07-01 17:49:09.133 + abcdefGhijkl
2025-07-01 17:49:09.133 """
2025-07-01 17:49:09.133
2025-07-01 17:49:09.133 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.134 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.139 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.139 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.139 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.139
2025-07-01 17:49:09.139 # search for the pair that matches best without being identical
2025-07-01 17:49:09.139 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.139 # on junk -- unless we have to)
2025-07-01 17:49:09.139 for j in range(blo, bhi):
2025-07-01 17:49:09.139 bj = b[j]
2025-07-01 17:49:09.139 cruncher.set_seq2(bj)
2025-07-01 17:49:09.139 for i in range(alo, ahi):
2025-07-01 17:49:09.139 ai = a[i]
2025-07-01 17:49:09.139 if ai == bj:
2025-07-01 17:49:09.139 if eqi is None:
2025-07-01 17:49:09.139 eqi, eqj = i, j
2025-07-01 17:49:09.140 continue
2025-07-01 17:49:09.140 cruncher.set_seq1(ai)
2025-07-01 17:49:09.140 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.140 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.140 # compares by a factor of 3.
2025-07-01 17:49:09.140 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.140 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.140 # of the computation is cached by cruncher
2025-07-01 17:49:09.140 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.140 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.140 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.140 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.140 if best_ratio < cutoff:
2025-07-01 17:49:09.140 # no non-identical "pretty close" pair
2025-07-01 17:49:09.140 if eqi is None:
2025-07-01 17:49:09.140 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.140 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.140 return
2025-07-01 17:49:09.140 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.140 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.141 else:
2025-07-01 17:49:09.141 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.141 eqi = None
2025-07-01 17:49:09.141
2025-07-01 17:49:09.141 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.141 # identical
2025-07-01 17:49:09.141
2025-07-01 17:49:09.141 # pump out diffs from before the synch point
2025-07-01 17:49:09.141 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.141
2025-07-01 17:49:09.141 # do intraline marking on the synch pair
2025-07-01 17:49:09.141 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.141 if eqi is None:
2025-07-01 17:49:09.141 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.141 atags = btags = ""
2025-07-01 17:49:09.141 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.141 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.141 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.141 if tag == 'replace':
2025-07-01 17:49:09.141 atags += '^' * la
2025-07-01 17:49:09.141 btags += '^' * lb
2025-07-01 17:49:09.141 elif tag == 'delete':
2025-07-01 17:49:09.142 atags += '-' * la
2025-07-01 17:49:09.142 elif tag == 'insert':
2025-07-01 17:49:09.142 btags += '+' * lb
2025-07-01 17:49:09.142 elif tag == 'equal':
2025-07-01 17:49:09.142 atags += ' ' * la
2025-07-01 17:49:09.142 btags += ' ' * lb
2025-07-01 17:49:09.142 else:
2025-07-01 17:49:09.142 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.142 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.142 else:
2025-07-01 17:49:09.142 # the synch pair is identical
2025-07-01 17:49:09.142 yield ' ' + aelt
2025-07-01 17:49:09.142
2025-07-01 17:49:09.142 # pump out diffs from after the synch point
2025-07-01 17:49:09.142 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.142
2025-07-01 17:49:09.142 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.142 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.142
2025-07-01 17:49:09.142 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.142 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.143 alo = 488, ahi = 1101
2025-07-01 17:49:09.143 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.143 blo = 488, bhi = 1101
2025-07-01 17:49:09.143
2025-07-01 17:49:09.143 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.143 g = []
2025-07-01 17:49:09.143 if alo < ahi:
2025-07-01 17:49:09.143 if blo < bhi:
2025-07-01 17:49:09.143 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.143 else:
2025-07-01 17:49:09.143 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.143 elif blo < bhi:
2025-07-01 17:49:09.143 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.143
2025-07-01 17:49:09.143 > yield from g
2025-07-01 17:49:09.143
2025-07-01 17:49:09.143 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.143 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.143
2025-07-01 17:49:09.143 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.143 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.143 alo = 488, ahi = 1101
2025-07-01 17:49:09.144 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.144 blo = 488, bhi = 1101
2025-07-01 17:49:09.144
2025-07-01 17:49:09.144 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.144 r"""
2025-07-01 17:49:09.144 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.144 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.144 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.144 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.144
2025-07-01 17:49:09.144 Example:
2025-07-01 17:49:09.144
2025-07-01 17:49:09.144 >>> d = Differ()
2025-07-01 17:49:09.144 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.144 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.144 >>> print(''.join(results), end="")
2025-07-01 17:49:09.144 - abcDefghiJkl
2025-07-01 17:49:09.144 + abcdefGhijkl
2025-07-01 17:49:09.145 """
2025-07-01 17:49:09.145
2025-07-01 17:49:09.145 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.145 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.145 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.145 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.145 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.145
2025-07-01 17:49:09.145 # search for the pair that matches best without being identical
2025-07-01 17:49:09.145 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.145 # on junk -- unless we have to)
2025-07-01 17:49:09.145 for j in range(blo, bhi):
2025-07-01 17:49:09.145 bj = b[j]
2025-07-01 17:49:09.145 cruncher.set_seq2(bj)
2025-07-01 17:49:09.145 for i in range(alo, ahi):
2025-07-01 17:49:09.145 ai = a[i]
2025-07-01 17:49:09.145 if ai == bj:
2025-07-01 17:49:09.145 if eqi is None:
2025-07-01 17:49:09.145 eqi, eqj = i, j
2025-07-01 17:49:09.146 continue
2025-07-01 17:49:09.146 cruncher.set_seq1(ai)
2025-07-01 17:49:09.146 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.146 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.146 # compares by a factor of 3.
2025-07-01 17:49:09.146 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.146 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.146 # of the computation is cached by cruncher
2025-07-01 17:49:09.146 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.146 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.146 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.146 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.146 if best_ratio < cutoff:
2025-07-01 17:49:09.146 # no non-identical "pretty close" pair
2025-07-01 17:49:09.146 if eqi is None:
2025-07-01 17:49:09.146 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.146 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.146 return
2025-07-01 17:49:09.146 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.146 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.146 else:
2025-07-01 17:49:09.147 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.147 eqi = None
2025-07-01 17:49:09.147
2025-07-01 17:49:09.147 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.147 # identical
2025-07-01 17:49:09.147
2025-07-01 17:49:09.147 # pump out diffs from before the synch point
2025-07-01 17:49:09.147 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.147
2025-07-01 17:49:09.147 # do intraline marking on the synch pair
2025-07-01 17:49:09.147 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.147 if eqi is None:
2025-07-01 17:49:09.147 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.147 atags = btags = ""
2025-07-01 17:49:09.147 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.147 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.147 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.147 if tag == 'replace':
2025-07-01 17:49:09.147 atags += '^' * la
2025-07-01 17:49:09.147 btags += '^' * lb
2025-07-01 17:49:09.147 elif tag == 'delete':
2025-07-01 17:49:09.148 atags += '-' * la
2025-07-01 17:49:09.148 elif tag == 'insert':
2025-07-01 17:49:09.148 btags += '+' * lb
2025-07-01 17:49:09.148 elif tag == 'equal':
2025-07-01 17:49:09.148 atags += ' ' * la
2025-07-01 17:49:09.148 btags += ' ' * lb
2025-07-01 17:49:09.148 else:
2025-07-01 17:49:09.148 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.148 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.148 else:
2025-07-01 17:49:09.148 # the synch pair is identical
2025-07-01 17:49:09.148 yield ' ' + aelt
2025-07-01 17:49:09.148
2025-07-01 17:49:09.148 # pump out diffs from after the synch point
2025-07-01 17:49:09.148 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.148
2025-07-01 17:49:09.148 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.148 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.148
2025-07-01 17:49:09.148 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.148 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.149 alo = 489, ahi = 1101
2025-07-01 17:49:09.149 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.149 blo = 489, bhi = 1101
2025-07-01 17:49:09.149
2025-07-01 17:49:09.149 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.149 g = []
2025-07-01 17:49:09.149 if alo < ahi:
2025-07-01 17:49:09.149 if blo < bhi:
2025-07-01 17:49:09.149 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.149 else:
2025-07-01 17:49:09.149 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.149 elif blo < bhi:
2025-07-01 17:49:09.149 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.149
2025-07-01 17:49:09.149 > yield from g
2025-07-01 17:49:09.149
2025-07-01 17:49:09.149 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.149 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.149
2025-07-01 17:49:09.149 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.149 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.150 alo = 489, ahi = 1101
2025-07-01 17:49:09.152 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.153 blo = 489, bhi = 1101
2025-07-01 17:49:09.153
2025-07-01 17:49:09.153 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.153 r"""
2025-07-01 17:49:09.153 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.153 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.153 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.153 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.153
2025-07-01 17:49:09.153 Example:
2025-07-01 17:49:09.153
2025-07-01 17:49:09.153 >>> d = Differ()
2025-07-01 17:49:09.153 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.153 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.153 >>> print(''.join(results), end="")
2025-07-01 17:49:09.153 - abcDefghiJkl
2025-07-01 17:49:09.153 + abcdefGhijkl
2025-07-01 17:49:09.153 """
2025-07-01 17:49:09.154
2025-07-01 17:49:09.154 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.154 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.154 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.154 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.154 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.154
2025-07-01 17:49:09.154 # search for the pair that matches best without being identical
2025-07-01 17:49:09.154 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.154 # on junk -- unless we have to)
2025-07-01 17:49:09.154 for j in range(blo, bhi):
2025-07-01 17:49:09.154 bj = b[j]
2025-07-01 17:49:09.154 cruncher.set_seq2(bj)
2025-07-01 17:49:09.154 for i in range(alo, ahi):
2025-07-01 17:49:09.154 ai = a[i]
2025-07-01 17:49:09.154 if ai == bj:
2025-07-01 17:49:09.154 if eqi is None:
2025-07-01 17:49:09.154 eqi, eqj = i, j
2025-07-01 17:49:09.154 continue
2025-07-01 17:49:09.154 cruncher.set_seq1(ai)
2025-07-01 17:49:09.154 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.155 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.155 # compares by a factor of 3.
2025-07-01 17:49:09.155 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.155 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.155 # of the computation is cached by cruncher
2025-07-01 17:49:09.155 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.155 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.155 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.155 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.155 if best_ratio < cutoff:
2025-07-01 17:49:09.155 # no non-identical "pretty close" pair
2025-07-01 17:49:09.155 if eqi is None:
2025-07-01 17:49:09.155 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.155 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.155 return
2025-07-01 17:49:09.155 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.155 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.155 else:
2025-07-01 17:49:09.155 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.156 eqi = None
2025-07-01 17:49:09.156
2025-07-01 17:49:09.156 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.156 # identical
2025-07-01 17:49:09.156
2025-07-01 17:49:09.156 # pump out diffs from before the synch point
2025-07-01 17:49:09.156 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.156
2025-07-01 17:49:09.156 # do intraline marking on the synch pair
2025-07-01 17:49:09.156 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.156 if eqi is None:
2025-07-01 17:49:09.156 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.156 atags = btags = ""
2025-07-01 17:49:09.156 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.156 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.156 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.156 if tag == 'replace':
2025-07-01 17:49:09.156 atags += '^' * la
2025-07-01 17:49:09.156 btags += '^' * lb
2025-07-01 17:49:09.156 elif tag == 'delete':
2025-07-01 17:49:09.157 atags += '-' * la
2025-07-01 17:49:09.157 elif tag == 'insert':
2025-07-01 17:49:09.157 btags += '+' * lb
2025-07-01 17:49:09.157 elif tag == 'equal':
2025-07-01 17:49:09.157 atags += ' ' * la
2025-07-01 17:49:09.157 btags += ' ' * lb
2025-07-01 17:49:09.157 else:
2025-07-01 17:49:09.157 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.157 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.157 else:
2025-07-01 17:49:09.157 # the synch pair is identical
2025-07-01 17:49:09.157 yield ' ' + aelt
2025-07-01 17:49:09.157
2025-07-01 17:49:09.157 # pump out diffs from after the synch point
2025-07-01 17:49:09.157 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.157
2025-07-01 17:49:09.157 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.157 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.157
2025-07-01 17:49:09.157 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.157 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.158 alo = 490, ahi = 1101
2025-07-01 17:49:09.158 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.158 blo = 490, bhi = 1101
2025-07-01 17:49:09.158
2025-07-01 17:49:09.158 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.158 g = []
2025-07-01 17:49:09.158 if alo < ahi:
2025-07-01 17:49:09.158 if blo < bhi:
2025-07-01 17:49:09.158 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.158 else:
2025-07-01 17:49:09.158 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.158 elif blo < bhi:
2025-07-01 17:49:09.158 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.158
2025-07-01 17:49:09.158 > yield from g
2025-07-01 17:49:09.158
2025-07-01 17:49:09.158 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.158 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.158
2025-07-01 17:49:09.158 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.159 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.159 alo = 490, ahi = 1101
2025-07-01 17:49:09.159 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.159 blo = 490, bhi = 1101
2025-07-01 17:49:09.159
2025-07-01 17:49:09.159 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.159 r"""
2025-07-01 17:49:09.159 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.159 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.159 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.159 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.159
2025-07-01 17:49:09.159 Example:
2025-07-01 17:49:09.159
2025-07-01 17:49:09.159 >>> d = Differ()
2025-07-01 17:49:09.159 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.159 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.159 >>> print(''.join(results), end="")
2025-07-01 17:49:09.159 - abcDefghiJkl
2025-07-01 17:49:09.159 + abcdefGhijkl
2025-07-01 17:49:09.160 """
2025-07-01 17:49:09.160
2025-07-01 17:49:09.160 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.160 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.160 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.160 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.160 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.160
2025-07-01 17:49:09.160 # search for the pair that matches best without being identical
2025-07-01 17:49:09.160 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.160 # on junk -- unless we have to)
2025-07-01 17:49:09.160 for j in range(blo, bhi):
2025-07-01 17:49:09.160 bj = b[j]
2025-07-01 17:49:09.160 cruncher.set_seq2(bj)
2025-07-01 17:49:09.160 for i in range(alo, ahi):
2025-07-01 17:49:09.160 ai = a[i]
2025-07-01 17:49:09.160 if ai == bj:
2025-07-01 17:49:09.160 if eqi is None:
2025-07-01 17:49:09.160 eqi, eqj = i, j
2025-07-01 17:49:09.160 continue
2025-07-01 17:49:09.161 cruncher.set_seq1(ai)
2025-07-01 17:49:09.161 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.161 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.161 # compares by a factor of 3.
2025-07-01 17:49:09.161 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.161 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.161 # of the computation is cached by cruncher
2025-07-01 17:49:09.161 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.161 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.161 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.161 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.161 if best_ratio < cutoff:
2025-07-01 17:49:09.161 # no non-identical "pretty close" pair
2025-07-01 17:49:09.161 if eqi is None:
2025-07-01 17:49:09.161 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.161 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.161 return
2025-07-01 17:49:09.161 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.161 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.161 else:
2025-07-01 17:49:09.162 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.162 eqi = None
2025-07-01 17:49:09.162
2025-07-01 17:49:09.162 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.162 # identical
2025-07-01 17:49:09.162
2025-07-01 17:49:09.162 # pump out diffs from before the synch point
2025-07-01 17:49:09.162 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.162
2025-07-01 17:49:09.162 # do intraline marking on the synch pair
2025-07-01 17:49:09.162 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.162 if eqi is None:
2025-07-01 17:49:09.162 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.162 atags = btags = ""
2025-07-01 17:49:09.162 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.162 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.162 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.162 if tag == 'replace':
2025-07-01 17:49:09.162 atags += '^' * la
2025-07-01 17:49:09.162 btags += '^' * lb
2025-07-01 17:49:09.162 elif tag == 'delete':
2025-07-01 17:49:09.163 atags += '-' * la
2025-07-01 17:49:09.163 elif tag == 'insert':
2025-07-01 17:49:09.163 btags += '+' * lb
2025-07-01 17:49:09.163 elif tag == 'equal':
2025-07-01 17:49:09.163 atags += ' ' * la
2025-07-01 17:49:09.163 btags += ' ' * lb
2025-07-01 17:49:09.163 else:
2025-07-01 17:49:09.163 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.163 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.163 else:
2025-07-01 17:49:09.163 # the synch pair is identical
2025-07-01 17:49:09.163 yield ' ' + aelt
2025-07-01 17:49:09.163
2025-07-01 17:49:09.163 # pump out diffs from after the synch point
2025-07-01 17:49:09.163 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.163
2025-07-01 17:49:09.163 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.163 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.163
2025-07-01 17:49:09.163 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.163 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.164 alo = 491, ahi = 1101
2025-07-01 17:49:09.164 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.164 blo = 491, bhi = 1101
2025-07-01 17:49:09.164
2025-07-01 17:49:09.164 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.164 g = []
2025-07-01 17:49:09.164 if alo < ahi:
2025-07-01 17:49:09.164 if blo < bhi:
2025-07-01 17:49:09.164 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.164 else:
2025-07-01 17:49:09.164 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.164 elif blo < bhi:
2025-07-01 17:49:09.164 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.164
2025-07-01 17:49:09.164 > yield from g
2025-07-01 17:49:09.164
2025-07-01 17:49:09.164 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.164 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.164
2025-07-01 17:49:09.165 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.165 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.165 alo = 491, ahi = 1101
2025-07-01 17:49:09.165 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.165 blo = 491, bhi = 1101
2025-07-01 17:49:09.165
2025-07-01 17:49:09.165 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.165 r"""
2025-07-01 17:49:09.165 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.165 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.165 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.165 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.165
2025-07-01 17:49:09.165 Example:
2025-07-01 17:49:09.165
2025-07-01 17:49:09.165 >>> d = Differ()
2025-07-01 17:49:09.165 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.165 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.165 >>> print(''.join(results), end="")
2025-07-01 17:49:09.165 - abcDefghiJkl
2025-07-01 17:49:09.171 + abcdefGhijkl
2025-07-01 17:49:09.171 """
2025-07-01 17:49:09.171
2025-07-01 17:49:09.171 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.171 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.171 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.171 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.171 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.171
2025-07-01 17:49:09.171 # search for the pair that matches best without being identical
2025-07-01 17:49:09.171 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.171 # on junk -- unless we have to)
2025-07-01 17:49:09.171 for j in range(blo, bhi):
2025-07-01 17:49:09.171 bj = b[j]
2025-07-01 17:49:09.171 cruncher.set_seq2(bj)
2025-07-01 17:49:09.171 for i in range(alo, ahi):
2025-07-01 17:49:09.171 ai = a[i]
2025-07-01 17:49:09.171 if ai == bj:
2025-07-01 17:49:09.172 if eqi is None:
2025-07-01 17:49:09.172 eqi, eqj = i, j
2025-07-01 17:49:09.172 continue
2025-07-01 17:49:09.172 cruncher.set_seq1(ai)
2025-07-01 17:49:09.172 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.172 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.172 # compares by a factor of 3.
2025-07-01 17:49:09.172 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.172 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.172 # of the computation is cached by cruncher
2025-07-01 17:49:09.172 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.172 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.172 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.172 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.172 if best_ratio < cutoff:
2025-07-01 17:49:09.172 # no non-identical "pretty close" pair
2025-07-01 17:49:09.172 if eqi is None:
2025-07-01 17:49:09.172 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.172 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.172 return
2025-07-01 17:49:09.172 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.173 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.173 else:
2025-07-01 17:49:09.173 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.173 eqi = None
2025-07-01 17:49:09.173
2025-07-01 17:49:09.173 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.173 # identical
2025-07-01 17:49:09.173
2025-07-01 17:49:09.173 # pump out diffs from before the synch point
2025-07-01 17:49:09.173 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.173
2025-07-01 17:49:09.173 # do intraline marking on the synch pair
2025-07-01 17:49:09.173 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.173 if eqi is None:
2025-07-01 17:49:09.173 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.173 atags = btags = ""
2025-07-01 17:49:09.173 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.173 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.173 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.173 if tag == 'replace':
2025-07-01 17:49:09.173 atags += '^' * la
2025-07-01 17:49:09.174 btags += '^' * lb
2025-07-01 17:49:09.174 elif tag == 'delete':
2025-07-01 17:49:09.174 atags += '-' * la
2025-07-01 17:49:09.174 elif tag == 'insert':
2025-07-01 17:49:09.174 btags += '+' * lb
2025-07-01 17:49:09.174 elif tag == 'equal':
2025-07-01 17:49:09.174 atags += ' ' * la
2025-07-01 17:49:09.174 btags += ' ' * lb
2025-07-01 17:49:09.174 else:
2025-07-01 17:49:09.174 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.174 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.174 else:
2025-07-01 17:49:09.174 # the synch pair is identical
2025-07-01 17:49:09.174 yield ' ' + aelt
2025-07-01 17:49:09.174
2025-07-01 17:49:09.174 # pump out diffs from after the synch point
2025-07-01 17:49:09.174 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.174
2025-07-01 17:49:09.174 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.174 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.174
2025-07-01 17:49:09.174 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.175 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.175 alo = 492, ahi = 1101
2025-07-01 17:49:09.175 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.175 blo = 492, bhi = 1101
2025-07-01 17:49:09.175
2025-07-01 17:49:09.175 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.175 g = []
2025-07-01 17:49:09.175 if alo < ahi:
2025-07-01 17:49:09.175 if blo < bhi:
2025-07-01 17:49:09.175 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.175 else:
2025-07-01 17:49:09.175 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.175 elif blo < bhi:
2025-07-01 17:49:09.175 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.175
2025-07-01 17:49:09.175 > yield from g
2025-07-01 17:49:09.175
2025-07-01 17:49:09.175 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.175 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.175
2025-07-01 17:49:09.175 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.176 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.176 alo = 492, ahi = 1101
2025-07-01 17:49:09.176 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.176 blo = 492, bhi = 1101
2025-07-01 17:49:09.176
2025-07-01 17:49:09.176 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.176 r"""
2025-07-01 17:49:09.176 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.176 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.176 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.176 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.176
2025-07-01 17:49:09.176 Example:
2025-07-01 17:49:09.176
2025-07-01 17:49:09.176 >>> d = Differ()
2025-07-01 17:49:09.176 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.176 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.176 >>> print(''.join(results), end="")
2025-07-01 17:49:09.176 - abcDefghiJkl
2025-07-01 17:49:09.176 + abcdefGhijkl
2025-07-01 17:49:09.177 """
2025-07-01 17:49:09.177
2025-07-01 17:49:09.177 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.177 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.177 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.177 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.177 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.177
2025-07-01 17:49:09.177 # search for the pair that matches best without being identical
2025-07-01 17:49:09.177 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.177 # on junk -- unless we have to)
2025-07-01 17:49:09.177 for j in range(blo, bhi):
2025-07-01 17:49:09.177 bj = b[j]
2025-07-01 17:49:09.177 cruncher.set_seq2(bj)
2025-07-01 17:49:09.177 for i in range(alo, ahi):
2025-07-01 17:49:09.177 ai = a[i]
2025-07-01 17:49:09.177 if ai == bj:
2025-07-01 17:49:09.177 if eqi is None:
2025-07-01 17:49:09.178 eqi, eqj = i, j
2025-07-01 17:49:09.178 continue
2025-07-01 17:49:09.178 cruncher.set_seq1(ai)
2025-07-01 17:49:09.178 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.178 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.178 # compares by a factor of 3.
2025-07-01 17:49:09.178 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.178 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.178 # of the computation is cached by cruncher
2025-07-01 17:49:09.178 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.178 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.178 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.178 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.178 if best_ratio < cutoff:
2025-07-01 17:49:09.178 # no non-identical "pretty close" pair
2025-07-01 17:49:09.178 if eqi is None:
2025-07-01 17:49:09.178 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.178 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.178 return
2025-07-01 17:49:09.178 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.179 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.179 else:
2025-07-01 17:49:09.179 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.179 eqi = None
2025-07-01 17:49:09.179
2025-07-01 17:49:09.179 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.179 # identical
2025-07-01 17:49:09.179
2025-07-01 17:49:09.179 # pump out diffs from before the synch point
2025-07-01 17:49:09.179 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.179
2025-07-01 17:49:09.179 # do intraline marking on the synch pair
2025-07-01 17:49:09.179 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.179 if eqi is None:
2025-07-01 17:49:09.179 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.179 atags = btags = ""
2025-07-01 17:49:09.179 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.179 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.179 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.179 if tag == 'replace':
2025-07-01 17:49:09.179 atags += '^' * la
2025-07-01 17:49:09.180 btags += '^' * lb
2025-07-01 17:49:09.180 elif tag == 'delete':
2025-07-01 17:49:09.180 atags += '-' * la
2025-07-01 17:49:09.180 elif tag == 'insert':
2025-07-01 17:49:09.180 btags += '+' * lb
2025-07-01 17:49:09.180 elif tag == 'equal':
2025-07-01 17:49:09.180 atags += ' ' * la
2025-07-01 17:49:09.180 btags += ' ' * lb
2025-07-01 17:49:09.180 else:
2025-07-01 17:49:09.180 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.180 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.180 else:
2025-07-01 17:49:09.180 # the synch pair is identical
2025-07-01 17:49:09.180 yield ' ' + aelt
2025-07-01 17:49:09.180
2025-07-01 17:49:09.180 # pump out diffs from after the synch point
2025-07-01 17:49:09.180 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.180
2025-07-01 17:49:09.180 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.180 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.181
2025-07-01 17:49:09.184 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.184 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.184 alo = 493, ahi = 1101
2025-07-01 17:49:09.184 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.184 blo = 493, bhi = 1101
2025-07-01 17:49:09.184
2025-07-01 17:49:09.184 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.184 g = []
2025-07-01 17:49:09.184 if alo < ahi:
2025-07-01 17:49:09.184 if blo < bhi:
2025-07-01 17:49:09.184 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.184 else:
2025-07-01 17:49:09.184 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.184 elif blo < bhi:
2025-07-01 17:49:09.184 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.184
2025-07-01 17:49:09.184 > yield from g
2025-07-01 17:49:09.184
2025-07-01 17:49:09.184 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.185 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.185
2025-07-01 17:49:09.185 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.185 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.185 alo = 493, ahi = 1101
2025-07-01 17:49:09.185 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.185 blo = 493, bhi = 1101
2025-07-01 17:49:09.185
2025-07-01 17:49:09.185 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.185 r"""
2025-07-01 17:49:09.185 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.185 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.185 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.185 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.185
2025-07-01 17:49:09.185 Example:
2025-07-01 17:49:09.185
2025-07-01 17:49:09.185 >>> d = Differ()
2025-07-01 17:49:09.185 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.185 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.186 >>> print(''.join(results), end="")
2025-07-01 17:49:09.186 - abcDefghiJkl
2025-07-01 17:49:09.186 + abcdefGhijkl
2025-07-01 17:49:09.186 """
2025-07-01 17:49:09.186
2025-07-01 17:49:09.186 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.186 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.186 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.186 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.186 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.186
2025-07-01 17:49:09.186 # search for the pair that matches best without being identical
2025-07-01 17:49:09.186 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.186 # on junk -- unless we have to)
2025-07-01 17:49:09.186 for j in range(blo, bhi):
2025-07-01 17:49:09.186 bj = b[j]
2025-07-01 17:49:09.186 cruncher.set_seq2(bj)
2025-07-01 17:49:09.186 for i in range(alo, ahi):
2025-07-01 17:49:09.186 ai = a[i]
2025-07-01 17:49:09.187 if ai == bj:
2025-07-01 17:49:09.187 if eqi is None:
2025-07-01 17:49:09.187 eqi, eqj = i, j
2025-07-01 17:49:09.187 continue
2025-07-01 17:49:09.187 cruncher.set_seq1(ai)
2025-07-01 17:49:09.187 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.187 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.187 # compares by a factor of 3.
2025-07-01 17:49:09.187 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.187 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.187 # of the computation is cached by cruncher
2025-07-01 17:49:09.187 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.187 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.187 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.187 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.187 if best_ratio < cutoff:
2025-07-01 17:49:09.187 # no non-identical "pretty close" pair
2025-07-01 17:49:09.187 if eqi is None:
2025-07-01 17:49:09.187 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.187 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.188 return
2025-07-01 17:49:09.188 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.188 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.188 else:
2025-07-01 17:49:09.188 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.188 eqi = None
2025-07-01 17:49:09.188
2025-07-01 17:49:09.188 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.188 # identical
2025-07-01 17:49:09.188
2025-07-01 17:49:09.188 # pump out diffs from before the synch point
2025-07-01 17:49:09.188 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.188
2025-07-01 17:49:09.188 # do intraline marking on the synch pair
2025-07-01 17:49:09.188 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.188 if eqi is None:
2025-07-01 17:49:09.188 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.188 atags = btags = ""
2025-07-01 17:49:09.188 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.188 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.188 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.189 if tag == 'replace':
2025-07-01 17:49:09.189 atags += '^' * la
2025-07-01 17:49:09.189 btags += '^' * lb
2025-07-01 17:49:09.189 elif tag == 'delete':
2025-07-01 17:49:09.189 atags += '-' * la
2025-07-01 17:49:09.189 elif tag == 'insert':
2025-07-01 17:49:09.189 btags += '+' * lb
2025-07-01 17:49:09.189 elif tag == 'equal':
2025-07-01 17:49:09.189 atags += ' ' * la
2025-07-01 17:49:09.189 btags += ' ' * lb
2025-07-01 17:49:09.189 else:
2025-07-01 17:49:09.189 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.189 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.189 else:
2025-07-01 17:49:09.189 # the synch pair is identical
2025-07-01 17:49:09.189 yield ' ' + aelt
2025-07-01 17:49:09.189
2025-07-01 17:49:09.189 # pump out diffs from after the synch point
2025-07-01 17:49:09.189 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.189
2025-07-01 17:49:09.190 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.190 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.190
2025-07-01 17:49:09.190 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.190 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.190 alo = 494, ahi = 1101
2025-07-01 17:49:09.190 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.190 blo = 494, bhi = 1101
2025-07-01 17:49:09.190
2025-07-01 17:49:09.190 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.190 g = []
2025-07-01 17:49:09.190 if alo < ahi:
2025-07-01 17:49:09.190 if blo < bhi:
2025-07-01 17:49:09.190 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.190 else:
2025-07-01 17:49:09.190 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.190 elif blo < bhi:
2025-07-01 17:49:09.190 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.190
2025-07-01 17:49:09.190 > yield from g
2025-07-01 17:49:09.190
2025-07-01 17:49:09.191 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.191 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.191
2025-07-01 17:49:09.191 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.191 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.191 alo = 494, ahi = 1101
2025-07-01 17:49:09.191 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.191 blo = 494, bhi = 1101
2025-07-01 17:49:09.191
2025-07-01 17:49:09.191 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.191 r"""
2025-07-01 17:49:09.191 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.191 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.191 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.191 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.191
2025-07-01 17:49:09.191 Example:
2025-07-01 17:49:09.191
2025-07-01 17:49:09.191 >>> d = Differ()
2025-07-01 17:49:09.191 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.192 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.192 >>> print(''.join(results), end="")
2025-07-01 17:49:09.192 - abcDefghiJkl
2025-07-01 17:49:09.192 + abcdefGhijkl
2025-07-01 17:49:09.192 """
2025-07-01 17:49:09.192
2025-07-01 17:49:09.192 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.192 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.192 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.192 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.192 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.192
2025-07-01 17:49:09.192 # search for the pair that matches best without being identical
2025-07-01 17:49:09.192 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.192 # on junk -- unless we have to)
2025-07-01 17:49:09.192 for j in range(blo, bhi):
2025-07-01 17:49:09.192 bj = b[j]
2025-07-01 17:49:09.192 cruncher.set_seq2(bj)
2025-07-01 17:49:09.193 for i in range(alo, ahi):
2025-07-01 17:49:09.193 ai = a[i]
2025-07-01 17:49:09.193 if ai == bj:
2025-07-01 17:49:09.193 if eqi is None:
2025-07-01 17:49:09.193 eqi, eqj = i, j
2025-07-01 17:49:09.193 continue
2025-07-01 17:49:09.193 cruncher.set_seq1(ai)
2025-07-01 17:49:09.193 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.193 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.193 # compares by a factor of 3.
2025-07-01 17:49:09.193 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.193 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.193 # of the computation is cached by cruncher
2025-07-01 17:49:09.193 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.193 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.193 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.193 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.193 if best_ratio < cutoff:
2025-07-01 17:49:09.193 # no non-identical "pretty close" pair
2025-07-01 17:49:09.193 if eqi is None:
2025-07-01 17:49:09.193 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.194 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.194 return
2025-07-01 17:49:09.194 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.194 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.194 else:
2025-07-01 17:49:09.194 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.194 eqi = None
2025-07-01 17:49:09.194
2025-07-01 17:49:09.194 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.194 # identical
2025-07-01 17:49:09.194
2025-07-01 17:49:09.194 # pump out diffs from before the synch point
2025-07-01 17:49:09.194 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.194
2025-07-01 17:49:09.194 # do intraline marking on the synch pair
2025-07-01 17:49:09.194 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.194 if eqi is None:
2025-07-01 17:49:09.194 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.194 atags = btags = ""
2025-07-01 17:49:09.194 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.194 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.195 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.195 if tag == 'replace':
2025-07-01 17:49:09.195 atags += '^' * la
2025-07-01 17:49:09.195 btags += '^' * lb
2025-07-01 17:49:09.195 elif tag == 'delete':
2025-07-01 17:49:09.195 atags += '-' * la
2025-07-01 17:49:09.195 elif tag == 'insert':
2025-07-01 17:49:09.195 btags += '+' * lb
2025-07-01 17:49:09.195 elif tag == 'equal':
2025-07-01 17:49:09.195 atags += ' ' * la
2025-07-01 17:49:09.195 btags += ' ' * lb
2025-07-01 17:49:09.195 else:
2025-07-01 17:49:09.195 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.195 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.195 else:
2025-07-01 17:49:09.195 # the synch pair is identical
2025-07-01 17:49:09.195 yield ' ' + aelt
2025-07-01 17:49:09.195
2025-07-01 17:49:09.195 # pump out diffs from after the synch point
2025-07-01 17:49:09.195 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.195
2025-07-01 17:49:09.195 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.196 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.196
2025-07-01 17:49:09.196 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.196 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.196 alo = 495, ahi = 1101
2025-07-01 17:49:09.196 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.196 blo = 495, bhi = 1101
2025-07-01 17:49:09.196
2025-07-01 17:49:09.196 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.196 g = []
2025-07-01 17:49:09.196 if alo < ahi:
2025-07-01 17:49:09.196 if blo < bhi:
2025-07-01 17:49:09.196 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.196 else:
2025-07-01 17:49:09.196 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.196 elif blo < bhi:
2025-07-01 17:49:09.196 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.196
2025-07-01 17:49:09.196 > yield from g
2025-07-01 17:49:09.196
2025-07-01 17:49:09.196 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.197 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.201
2025-07-01 17:49:09.202 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.202 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.202 alo = 495, ahi = 1101
2025-07-01 17:49:09.202 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.202 blo = 495, bhi = 1101
2025-07-01 17:49:09.202
2025-07-01 17:49:09.202 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.202 r"""
2025-07-01 17:49:09.202 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.202 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.202 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.202 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.202
2025-07-01 17:49:09.202 Example:
2025-07-01 17:49:09.202
2025-07-01 17:49:09.202 >>> d = Differ()
2025-07-01 17:49:09.202 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.202 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.202 >>> print(''.join(results), end="")
2025-07-01 17:49:09.203 - abcDefghiJkl
2025-07-01 17:49:09.203 + abcdefGhijkl
2025-07-01 17:49:09.203 """
2025-07-01 17:49:09.203
2025-07-01 17:49:09.203 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.203 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.203 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.203 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.203 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.203
2025-07-01 17:49:09.203 # search for the pair that matches best without being identical
2025-07-01 17:49:09.203 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.203 # on junk -- unless we have to)
2025-07-01 17:49:09.203 for j in range(blo, bhi):
2025-07-01 17:49:09.203 bj = b[j]
2025-07-01 17:49:09.203 cruncher.set_seq2(bj)
2025-07-01 17:49:09.203 for i in range(alo, ahi):
2025-07-01 17:49:09.203 ai = a[i]
2025-07-01 17:49:09.204 if ai == bj:
2025-07-01 17:49:09.204 if eqi is None:
2025-07-01 17:49:09.204 eqi, eqj = i, j
2025-07-01 17:49:09.204 continue
2025-07-01 17:49:09.204 cruncher.set_seq1(ai)
2025-07-01 17:49:09.204 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.204 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.204 # compares by a factor of 3.
2025-07-01 17:49:09.204 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.204 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.204 # of the computation is cached by cruncher
2025-07-01 17:49:09.204 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.204 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.204 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.204 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.204 if best_ratio < cutoff:
2025-07-01 17:49:09.204 # no non-identical "pretty close" pair
2025-07-01 17:49:09.204 if eqi is None:
2025-07-01 17:49:09.204 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.204 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.204 return
2025-07-01 17:49:09.205 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.205 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.205 else:
2025-07-01 17:49:09.205 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.205 eqi = None
2025-07-01 17:49:09.205
2025-07-01 17:49:09.205 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.205 # identical
2025-07-01 17:49:09.205
2025-07-01 17:49:09.205 # pump out diffs from before the synch point
2025-07-01 17:49:09.205 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.205
2025-07-01 17:49:09.205 # do intraline marking on the synch pair
2025-07-01 17:49:09.205 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.205 if eqi is None:
2025-07-01 17:49:09.205 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.205 atags = btags = ""
2025-07-01 17:49:09.205 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.205 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.205 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.206 if tag == 'replace':
2025-07-01 17:49:09.206 atags += '^' * la
2025-07-01 17:49:09.206 btags += '^' * lb
2025-07-01 17:49:09.206 elif tag == 'delete':
2025-07-01 17:49:09.206 atags += '-' * la
2025-07-01 17:49:09.206 elif tag == 'insert':
2025-07-01 17:49:09.206 btags += '+' * lb
2025-07-01 17:49:09.206 elif tag == 'equal':
2025-07-01 17:49:09.206 atags += ' ' * la
2025-07-01 17:49:09.206 btags += ' ' * lb
2025-07-01 17:49:09.206 else:
2025-07-01 17:49:09.206 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.206 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.206 else:
2025-07-01 17:49:09.206 # the synch pair is identical
2025-07-01 17:49:09.206 yield ' ' + aelt
2025-07-01 17:49:09.206
2025-07-01 17:49:09.206 # pump out diffs from after the synch point
2025-07-01 17:49:09.206 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.206
2025-07-01 17:49:09.206 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.207 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.207
2025-07-01 17:49:09.207 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.207 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.207 alo = 496, ahi = 1101
2025-07-01 17:49:09.207 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.207 blo = 496, bhi = 1101
2025-07-01 17:49:09.207
2025-07-01 17:49:09.207 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.207 g = []
2025-07-01 17:49:09.207 if alo < ahi:
2025-07-01 17:49:09.207 if blo < bhi:
2025-07-01 17:49:09.207 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.207 else:
2025-07-01 17:49:09.207 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.207 elif blo < bhi:
2025-07-01 17:49:09.207 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.207
2025-07-01 17:49:09.207 > yield from g
2025-07-01 17:49:09.207
2025-07-01 17:49:09.208 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.208 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.208
2025-07-01 17:49:09.208 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.208 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.208 alo = 496, ahi = 1101
2025-07-01 17:49:09.208 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.208 blo = 496, bhi = 1101
2025-07-01 17:49:09.208
2025-07-01 17:49:09.208 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.208 r"""
2025-07-01 17:49:09.208 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.208 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.208 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.208 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.208
2025-07-01 17:49:09.208 Example:
2025-07-01 17:49:09.208
2025-07-01 17:49:09.208 >>> d = Differ()
2025-07-01 17:49:09.208 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.209 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.209 >>> print(''.join(results), end="")
2025-07-01 17:49:09.209 - abcDefghiJkl
2025-07-01 17:49:09.209 + abcdefGhijkl
2025-07-01 17:49:09.209 """
2025-07-01 17:49:09.209
2025-07-01 17:49:09.209 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.209 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.209 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.209 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.209 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.209
2025-07-01 17:49:09.209 # search for the pair that matches best without being identical
2025-07-01 17:49:09.209 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.209 # on junk -- unless we have to)
2025-07-01 17:49:09.209 for j in range(blo, bhi):
2025-07-01 17:49:09.209 bj = b[j]
2025-07-01 17:49:09.209 cruncher.set_seq2(bj)
2025-07-01 17:49:09.209 for i in range(alo, ahi):
2025-07-01 17:49:09.210 ai = a[i]
2025-07-01 17:49:09.210 if ai == bj:
2025-07-01 17:49:09.210 if eqi is None:
2025-07-01 17:49:09.210 eqi, eqj = i, j
2025-07-01 17:49:09.210 continue
2025-07-01 17:49:09.210 cruncher.set_seq1(ai)
2025-07-01 17:49:09.210 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.210 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.210 # compares by a factor of 3.
2025-07-01 17:49:09.210 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.210 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.210 # of the computation is cached by cruncher
2025-07-01 17:49:09.210 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.210 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.210 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.210 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.210 if best_ratio < cutoff:
2025-07-01 17:49:09.210 # no non-identical "pretty close" pair
2025-07-01 17:49:09.210 if eqi is None:
2025-07-01 17:49:09.210 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.211 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.211 return
2025-07-01 17:49:09.211 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.211 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.211 else:
2025-07-01 17:49:09.211 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.211 eqi = None
2025-07-01 17:49:09.211
2025-07-01 17:49:09.211 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.211 # identical
2025-07-01 17:49:09.211
2025-07-01 17:49:09.211 # pump out diffs from before the synch point
2025-07-01 17:49:09.211 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.211
2025-07-01 17:49:09.211 # do intraline marking on the synch pair
2025-07-01 17:49:09.211 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.211 if eqi is None:
2025-07-01 17:49:09.211 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.211 atags = btags = ""
2025-07-01 17:49:09.212 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.214 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.214 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.215 if tag == 'replace':
2025-07-01 17:49:09.215 atags += '^' * la
2025-07-01 17:49:09.215 btags += '^' * lb
2025-07-01 17:49:09.215 elif tag == 'delete':
2025-07-01 17:49:09.215 atags += '-' * la
2025-07-01 17:49:09.215 elif tag == 'insert':
2025-07-01 17:49:09.215 btags += '+' * lb
2025-07-01 17:49:09.215 elif tag == 'equal':
2025-07-01 17:49:09.215 atags += ' ' * la
2025-07-01 17:49:09.215 btags += ' ' * lb
2025-07-01 17:49:09.215 else:
2025-07-01 17:49:09.215 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.215 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.215 else:
2025-07-01 17:49:09.215 # the synch pair is identical
2025-07-01 17:49:09.215 yield ' ' + aelt
2025-07-01 17:49:09.215
2025-07-01 17:49:09.215 # pump out diffs from after the synch point
2025-07-01 17:49:09.215 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.215
2025-07-01 17:49:09.216 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.216 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.216
2025-07-01 17:49:09.216 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.216 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.216 alo = 497, ahi = 1101
2025-07-01 17:49:09.216 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.216 blo = 497, bhi = 1101
2025-07-01 17:49:09.216
2025-07-01 17:49:09.216 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.216 g = []
2025-07-01 17:49:09.216 if alo < ahi:
2025-07-01 17:49:09.216 if blo < bhi:
2025-07-01 17:49:09.216 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.216 else:
2025-07-01 17:49:09.216 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.216 elif blo < bhi:
2025-07-01 17:49:09.216 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.216
2025-07-01 17:49:09.216 > yield from g
2025-07-01 17:49:09.216
2025-07-01 17:49:09.216 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.217 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.217
2025-07-01 17:49:09.217 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.217 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.217 alo = 497, ahi = 1101
2025-07-01 17:49:09.217 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.217 blo = 497, bhi = 1101
2025-07-01 17:49:09.217
2025-07-01 17:49:09.217 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.217 r"""
2025-07-01 17:49:09.217 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.217 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.217 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.217 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.217
2025-07-01 17:49:09.217 Example:
2025-07-01 17:49:09.217
2025-07-01 17:49:09.217 >>> d = Differ()
2025-07-01 17:49:09.217 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.217 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.217 >>> print(''.join(results), end="")
2025-07-01 17:49:09.218 - abcDefghiJkl
2025-07-01 17:49:09.218 + abcdefGhijkl
2025-07-01 17:49:09.218 """
2025-07-01 17:49:09.218
2025-07-01 17:49:09.218 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.218 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.218 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.218 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.218 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.218
2025-07-01 17:49:09.218 # search for the pair that matches best without being identical
2025-07-01 17:49:09.218 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.218 # on junk -- unless we have to)
2025-07-01 17:49:09.218 for j in range(blo, bhi):
2025-07-01 17:49:09.218 bj = b[j]
2025-07-01 17:49:09.218 cruncher.set_seq2(bj)
2025-07-01 17:49:09.218 for i in range(alo, ahi):
2025-07-01 17:49:09.218 ai = a[i]
2025-07-01 17:49:09.219 if ai == bj:
2025-07-01 17:49:09.219 if eqi is None:
2025-07-01 17:49:09.219 eqi, eqj = i, j
2025-07-01 17:49:09.219 continue
2025-07-01 17:49:09.219 cruncher.set_seq1(ai)
2025-07-01 17:49:09.219 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.219 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.219 # compares by a factor of 3.
2025-07-01 17:49:09.219 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.219 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.219 # of the computation is cached by cruncher
2025-07-01 17:49:09.219 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.219 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.219 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.219 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.219 if best_ratio < cutoff:
2025-07-01 17:49:09.219 # no non-identical "pretty close" pair
2025-07-01 17:49:09.219 if eqi is None:
2025-07-01 17:49:09.219 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.219 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.219 return
2025-07-01 17:49:09.220 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.220 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.220 else:
2025-07-01 17:49:09.220 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.220 eqi = None
2025-07-01 17:49:09.220
2025-07-01 17:49:09.220 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.220 # identical
2025-07-01 17:49:09.220
2025-07-01 17:49:09.220 # pump out diffs from before the synch point
2025-07-01 17:49:09.220 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.220
2025-07-01 17:49:09.220 # do intraline marking on the synch pair
2025-07-01 17:49:09.220 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.220 if eqi is None:
2025-07-01 17:49:09.220 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.220 atags = btags = ""
2025-07-01 17:49:09.220 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.220 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.220 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.220 if tag == 'replace':
2025-07-01 17:49:09.221 atags += '^' * la
2025-07-01 17:49:09.221 btags += '^' * lb
2025-07-01 17:49:09.221 elif tag == 'delete':
2025-07-01 17:49:09.221 atags += '-' * la
2025-07-01 17:49:09.221 elif tag == 'insert':
2025-07-01 17:49:09.221 btags += '+' * lb
2025-07-01 17:49:09.221 elif tag == 'equal':
2025-07-01 17:49:09.221 atags += ' ' * la
2025-07-01 17:49:09.221 btags += ' ' * lb
2025-07-01 17:49:09.221 else:
2025-07-01 17:49:09.221 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.221 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.221 else:
2025-07-01 17:49:09.221 # the synch pair is identical
2025-07-01 17:49:09.221 yield ' ' + aelt
2025-07-01 17:49:09.221
2025-07-01 17:49:09.221 # pump out diffs from after the synch point
2025-07-01 17:49:09.221 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.221
2025-07-01 17:49:09.221 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.221 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.221
2025-07-01 17:49:09.222 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.222 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.222 alo = 498, ahi = 1101
2025-07-01 17:49:09.222 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.222 blo = 498, bhi = 1101
2025-07-01 17:49:09.222
2025-07-01 17:49:09.222 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.222 g = []
2025-07-01 17:49:09.222 if alo < ahi:
2025-07-01 17:49:09.222 if blo < bhi:
2025-07-01 17:49:09.222 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.222 else:
2025-07-01 17:49:09.222 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.222 elif blo < bhi:
2025-07-01 17:49:09.222 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.222
2025-07-01 17:49:09.222 > yield from g
2025-07-01 17:49:09.222
2025-07-01 17:49:09.223 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.223 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.223
2025-07-01 17:49:09.223 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.223 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.223 alo = 498, ahi = 1101
2025-07-01 17:49:09.223 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.223 blo = 498, bhi = 1101
2025-07-01 17:49:09.223
2025-07-01 17:49:09.223 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.223 r"""
2025-07-01 17:49:09.223 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.223 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.223 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.223 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.223
2025-07-01 17:49:09.223 Example:
2025-07-01 17:49:09.223
2025-07-01 17:49:09.223 >>> d = Differ()
2025-07-01 17:49:09.223 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.224 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.224 >>> print(''.join(results), end="")
2025-07-01 17:49:09.224 - abcDefghiJkl
2025-07-01 17:49:09.224 + abcdefGhijkl
2025-07-01 17:49:09.224 """
2025-07-01 17:49:09.224
2025-07-01 17:49:09.224 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.224 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.224 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.224 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.224 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.224
2025-07-01 17:49:09.224 # search for the pair that matches best without being identical
2025-07-01 17:49:09.224 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.224 # on junk -- unless we have to)
2025-07-01 17:49:09.224 for j in range(blo, bhi):
2025-07-01 17:49:09.224 bj = b[j]
2025-07-01 17:49:09.224 cruncher.set_seq2(bj)
2025-07-01 17:49:09.224 for i in range(alo, ahi):
2025-07-01 17:49:09.225 ai = a[i]
2025-07-01 17:49:09.225 if ai == bj:
2025-07-01 17:49:09.225 if eqi is None:
2025-07-01 17:49:09.225 eqi, eqj = i, j
2025-07-01 17:49:09.225 continue
2025-07-01 17:49:09.225 cruncher.set_seq1(ai)
2025-07-01 17:49:09.225 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.225 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.225 # compares by a factor of 3.
2025-07-01 17:49:09.225 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.225 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.225 # of the computation is cached by cruncher
2025-07-01 17:49:09.225 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.225 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.225 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.225 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.225 if best_ratio < cutoff:
2025-07-01 17:49:09.225 # no non-identical "pretty close" pair
2025-07-01 17:49:09.225 if eqi is None:
2025-07-01 17:49:09.225 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.226 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.226 return
2025-07-01 17:49:09.226 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.226 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.226 else:
2025-07-01 17:49:09.226 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.226 eqi = None
2025-07-01 17:49:09.226
2025-07-01 17:49:09.226 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.226 # identical
2025-07-01 17:49:09.226
2025-07-01 17:49:09.226 # pump out diffs from before the synch point
2025-07-01 17:49:09.226 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.226
2025-07-01 17:49:09.226 # do intraline marking on the synch pair
2025-07-01 17:49:09.226 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.226 if eqi is None:
2025-07-01 17:49:09.226 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.226 atags = btags = ""
2025-07-01 17:49:09.226 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.226 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.227 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.231 if tag == 'replace':
2025-07-01 17:49:09.232 atags += '^' * la
2025-07-01 17:49:09.232 btags += '^' * lb
2025-07-01 17:49:09.232 elif tag == 'delete':
2025-07-01 17:49:09.232 atags += '-' * la
2025-07-01 17:49:09.232 elif tag == 'insert':
2025-07-01 17:49:09.232 btags += '+' * lb
2025-07-01 17:49:09.232 elif tag == 'equal':
2025-07-01 17:49:09.232 atags += ' ' * la
2025-07-01 17:49:09.232 btags += ' ' * lb
2025-07-01 17:49:09.232 else:
2025-07-01 17:49:09.232 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.232 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.232 else:
2025-07-01 17:49:09.232 # the synch pair is identical
2025-07-01 17:49:09.232 yield ' ' + aelt
2025-07-01 17:49:09.232
2025-07-01 17:49:09.232 # pump out diffs from after the synch point
2025-07-01 17:49:09.232 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.232
2025-07-01 17:49:09.232 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.232 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.233
2025-07-01 17:49:09.233 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.233 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.233 alo = 499, ahi = 1101
2025-07-01 17:49:09.233 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.233 blo = 499, bhi = 1101
2025-07-01 17:49:09.233
2025-07-01 17:49:09.233 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.233 g = []
2025-07-01 17:49:09.233 if alo < ahi:
2025-07-01 17:49:09.233 if blo < bhi:
2025-07-01 17:49:09.233 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.233 else:
2025-07-01 17:49:09.233 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.233 elif blo < bhi:
2025-07-01 17:49:09.233 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.233
2025-07-01 17:49:09.233 > yield from g
2025-07-01 17:49:09.233
2025-07-01 17:49:09.233 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.234 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.234
2025-07-01 17:49:09.234 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.234 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.234 alo = 499, ahi = 1101
2025-07-01 17:49:09.234 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.234 blo = 499, bhi = 1101
2025-07-01 17:49:09.234
2025-07-01 17:49:09.234 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.234 r"""
2025-07-01 17:49:09.234 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.234 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.234 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.234 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.234
2025-07-01 17:49:09.234 Example:
2025-07-01 17:49:09.234
2025-07-01 17:49:09.234 >>> d = Differ()
2025-07-01 17:49:09.234 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.234 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.234 >>> print(''.join(results), end="")
2025-07-01 17:49:09.235 - abcDefghiJkl
2025-07-01 17:49:09.235 + abcdefGhijkl
2025-07-01 17:49:09.235 """
2025-07-01 17:49:09.235
2025-07-01 17:49:09.235 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.235 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.235 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.235 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.235 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.235
2025-07-01 17:49:09.235 # search for the pair that matches best without being identical
2025-07-01 17:49:09.235 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.235 # on junk -- unless we have to)
2025-07-01 17:49:09.235 for j in range(blo, bhi):
2025-07-01 17:49:09.235 bj = b[j]
2025-07-01 17:49:09.235 cruncher.set_seq2(bj)
2025-07-01 17:49:09.235 for i in range(alo, ahi):
2025-07-01 17:49:09.236 ai = a[i]
2025-07-01 17:49:09.236 if ai == bj:
2025-07-01 17:49:09.236 if eqi is None:
2025-07-01 17:49:09.236 eqi, eqj = i, j
2025-07-01 17:49:09.236 continue
2025-07-01 17:49:09.236 cruncher.set_seq1(ai)
2025-07-01 17:49:09.236 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.236 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.236 # compares by a factor of 3.
2025-07-01 17:49:09.236 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.236 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.236 # of the computation is cached by cruncher
2025-07-01 17:49:09.236 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.236 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.236 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.236 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.236 if best_ratio < cutoff:
2025-07-01 17:49:09.236 # no non-identical "pretty close" pair
2025-07-01 17:49:09.236 if eqi is None:
2025-07-01 17:49:09.236 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.237 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.237 return
2025-07-01 17:49:09.237 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.237 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.237 else:
2025-07-01 17:49:09.237 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.237 eqi = None
2025-07-01 17:49:09.237
2025-07-01 17:49:09.237 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.237 # identical
2025-07-01 17:49:09.237
2025-07-01 17:49:09.237 # pump out diffs from before the synch point
2025-07-01 17:49:09.237 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.237
2025-07-01 17:49:09.237 # do intraline marking on the synch pair
2025-07-01 17:49:09.237 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.237 if eqi is None:
2025-07-01 17:49:09.237 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.237 atags = btags = ""
2025-07-01 17:49:09.237 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.237 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.238 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.238 if tag == 'replace':
2025-07-01 17:49:09.238 atags += '^' * la
2025-07-01 17:49:09.238 btags += '^' * lb
2025-07-01 17:49:09.238 elif tag == 'delete':
2025-07-01 17:49:09.238 atags += '-' * la
2025-07-01 17:49:09.238 elif tag == 'insert':
2025-07-01 17:49:09.238 btags += '+' * lb
2025-07-01 17:49:09.238 elif tag == 'equal':
2025-07-01 17:49:09.238 atags += ' ' * la
2025-07-01 17:49:09.238 btags += ' ' * lb
2025-07-01 17:49:09.238 else:
2025-07-01 17:49:09.238 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.238 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.238 else:
2025-07-01 17:49:09.238 # the synch pair is identical
2025-07-01 17:49:09.238 yield ' ' + aelt
2025-07-01 17:49:09.238
2025-07-01 17:49:09.238 # pump out diffs from after the synch point
2025-07-01 17:49:09.238 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.238
2025-07-01 17:49:09.238 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.239 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.239
2025-07-01 17:49:09.239 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.239 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.239 alo = 500, ahi = 1101
2025-07-01 17:49:09.239 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.239 blo = 500, bhi = 1101
2025-07-01 17:49:09.239
2025-07-01 17:49:09.239 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.239 g = []
2025-07-01 17:49:09.239 if alo < ahi:
2025-07-01 17:49:09.239 if blo < bhi:
2025-07-01 17:49:09.239 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.239 else:
2025-07-01 17:49:09.239 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.239 elif blo < bhi:
2025-07-01 17:49:09.239 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.239
2025-07-01 17:49:09.239 > yield from g
2025-07-01 17:49:09.239
2025-07-01 17:49:09.239 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.240 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.240
2025-07-01 17:49:09.240 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.240 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.240 alo = 500, ahi = 1101
2025-07-01 17:49:09.240 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.240 blo = 500, bhi = 1101
2025-07-01 17:49:09.240
2025-07-01 17:49:09.240 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.240 r"""
2025-07-01 17:49:09.240 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.240 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.240 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.240 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.240
2025-07-01 17:49:09.240 Example:
2025-07-01 17:49:09.240
2025-07-01 17:49:09.240 >>> d = Differ()
2025-07-01 17:49:09.240 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.240 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.240 >>> print(''.join(results), end="")
2025-07-01 17:49:09.240 - abcDefghiJkl
2025-07-01 17:49:09.241 + abcdefGhijkl
2025-07-01 17:49:09.241 """
2025-07-01 17:49:09.241
2025-07-01 17:49:09.241 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.241 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.241 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.241 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.241 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.241
2025-07-01 17:49:09.241 # search for the pair that matches best without being identical
2025-07-01 17:49:09.241 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.241 # on junk -- unless we have to)
2025-07-01 17:49:09.241 for j in range(blo, bhi):
2025-07-01 17:49:09.241 bj = b[j]
2025-07-01 17:49:09.241 cruncher.set_seq2(bj)
2025-07-01 17:49:09.241 for i in range(alo, ahi):
2025-07-01 17:49:09.241 ai = a[i]
2025-07-01 17:49:09.241 if ai == bj:
2025-07-01 17:49:09.241 if eqi is None:
2025-07-01 17:49:09.242 eqi, eqj = i, j
2025-07-01 17:49:09.245 continue
2025-07-01 17:49:09.245 cruncher.set_seq1(ai)
2025-07-01 17:49:09.245 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.245 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.245 # compares by a factor of 3.
2025-07-01 17:49:09.245 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.245 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.245 # of the computation is cached by cruncher
2025-07-01 17:49:09.245 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.245 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.245 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.245 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.245 if best_ratio < cutoff:
2025-07-01 17:49:09.245 # no non-identical "pretty close" pair
2025-07-01 17:49:09.245 if eqi is None:
2025-07-01 17:49:09.245 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.245 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.245 return
2025-07-01 17:49:09.245 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.246 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.246 else:
2025-07-01 17:49:09.246 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.246 eqi = None
2025-07-01 17:49:09.246
2025-07-01 17:49:09.246 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.246 # identical
2025-07-01 17:49:09.246
2025-07-01 17:49:09.246 # pump out diffs from before the synch point
2025-07-01 17:49:09.246 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.246
2025-07-01 17:49:09.246 # do intraline marking on the synch pair
2025-07-01 17:49:09.246 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.246 if eqi is None:
2025-07-01 17:49:09.246 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.246 atags = btags = ""
2025-07-01 17:49:09.246 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.246 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.246 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.247 if tag == 'replace':
2025-07-01 17:49:09.247 atags += '^' * la
2025-07-01 17:49:09.247 btags += '^' * lb
2025-07-01 17:49:09.247 elif tag == 'delete':
2025-07-01 17:49:09.247 atags += '-' * la
2025-07-01 17:49:09.247 elif tag == 'insert':
2025-07-01 17:49:09.247 btags += '+' * lb
2025-07-01 17:49:09.247 elif tag == 'equal':
2025-07-01 17:49:09.247 atags += ' ' * la
2025-07-01 17:49:09.247 btags += ' ' * lb
2025-07-01 17:49:09.247 else:
2025-07-01 17:49:09.247 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.247 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.247 else:
2025-07-01 17:49:09.247 # the synch pair is identical
2025-07-01 17:49:09.247 yield ' ' + aelt
2025-07-01 17:49:09.247
2025-07-01 17:49:09.247 # pump out diffs from after the synch point
2025-07-01 17:49:09.247 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.247
2025-07-01 17:49:09.248 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.248 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.248
2025-07-01 17:49:09.248 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.248 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.248 alo = 501, ahi = 1101
2025-07-01 17:49:09.248 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.248 blo = 501, bhi = 1101
2025-07-01 17:49:09.248
2025-07-01 17:49:09.248 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.248 g = []
2025-07-01 17:49:09.248 if alo < ahi:
2025-07-01 17:49:09.248 if blo < bhi:
2025-07-01 17:49:09.248 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.248 else:
2025-07-01 17:49:09.248 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.248 elif blo < bhi:
2025-07-01 17:49:09.248 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.248
2025-07-01 17:49:09.248 > yield from g
2025-07-01 17:49:09.249
2025-07-01 17:49:09.249 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.249 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.249
2025-07-01 17:49:09.249 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.249 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.249 alo = 501, ahi = 1101
2025-07-01 17:49:09.249 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.249 blo = 501, bhi = 1101
2025-07-01 17:49:09.249
2025-07-01 17:49:09.249 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.249 r"""
2025-07-01 17:49:09.249 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.249 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.249 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.249 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.249
2025-07-01 17:49:09.249 Example:
2025-07-01 17:49:09.249
2025-07-01 17:49:09.249 >>> d = Differ()
2025-07-01 17:49:09.249 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.250 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.250 >>> print(''.join(results), end="")
2025-07-01 17:49:09.250 - abcDefghiJkl
2025-07-01 17:49:09.250 + abcdefGhijkl
2025-07-01 17:49:09.250 """
2025-07-01 17:49:09.250
2025-07-01 17:49:09.250 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.250 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.250 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.250 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.250 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.250
2025-07-01 17:49:09.250 # search for the pair that matches best without being identical
2025-07-01 17:49:09.250 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.250 # on junk -- unless we have to)
2025-07-01 17:49:09.250 for j in range(blo, bhi):
2025-07-01 17:49:09.250 bj = b[j]
2025-07-01 17:49:09.250 cruncher.set_seq2(bj)
2025-07-01 17:49:09.250 for i in range(alo, ahi):
2025-07-01 17:49:09.251 ai = a[i]
2025-07-01 17:49:09.251 if ai == bj:
2025-07-01 17:49:09.251 if eqi is None:
2025-07-01 17:49:09.251 eqi, eqj = i, j
2025-07-01 17:49:09.251 continue
2025-07-01 17:49:09.251 cruncher.set_seq1(ai)
2025-07-01 17:49:09.251 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.251 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.251 # compares by a factor of 3.
2025-07-01 17:49:09.251 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.251 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.251 # of the computation is cached by cruncher
2025-07-01 17:49:09.251 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.251 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.251 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.251 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.251 if best_ratio < cutoff:
2025-07-01 17:49:09.251 # no non-identical "pretty close" pair
2025-07-01 17:49:09.251 if eqi is None:
2025-07-01 17:49:09.251 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.252 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.252 return
2025-07-01 17:49:09.252 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.252 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.252 else:
2025-07-01 17:49:09.252 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.252 eqi = None
2025-07-01 17:49:09.252
2025-07-01 17:49:09.252 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.252 # identical
2025-07-01 17:49:09.252
2025-07-01 17:49:09.252 # pump out diffs from before the synch point
2025-07-01 17:49:09.252 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.252
2025-07-01 17:49:09.252 # do intraline marking on the synch pair
2025-07-01 17:49:09.252 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.252 if eqi is None:
2025-07-01 17:49:09.252 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.252 atags = btags = ""
2025-07-01 17:49:09.252 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.252 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.253 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.253 if tag == 'replace':
2025-07-01 17:49:09.253 atags += '^' * la
2025-07-01 17:49:09.253 btags += '^' * lb
2025-07-01 17:49:09.253 elif tag == 'delete':
2025-07-01 17:49:09.253 atags += '-' * la
2025-07-01 17:49:09.253 elif tag == 'insert':
2025-07-01 17:49:09.253 btags += '+' * lb
2025-07-01 17:49:09.253 elif tag == 'equal':
2025-07-01 17:49:09.253 atags += ' ' * la
2025-07-01 17:49:09.253 btags += ' ' * lb
2025-07-01 17:49:09.253 else:
2025-07-01 17:49:09.253 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.253 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.253 else:
2025-07-01 17:49:09.253 # the synch pair is identical
2025-07-01 17:49:09.253 yield ' ' + aelt
2025-07-01 17:49:09.253
2025-07-01 17:49:09.253 # pump out diffs from after the synch point
2025-07-01 17:49:09.253 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.254
2025-07-01 17:49:09.254 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.254 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.254
2025-07-01 17:49:09.254 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.254 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.254 alo = 502, ahi = 1101
2025-07-01 17:49:09.254 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.254 blo = 502, bhi = 1101
2025-07-01 17:49:09.254
2025-07-01 17:49:09.254 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.254 g = []
2025-07-01 17:49:09.254 if alo < ahi:
2025-07-01 17:49:09.254 if blo < bhi:
2025-07-01 17:49:09.254 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.254 else:
2025-07-01 17:49:09.254 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.254 elif blo < bhi:
2025-07-01 17:49:09.254 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.254
2025-07-01 17:49:09.255 > yield from g
2025-07-01 17:49:09.255
2025-07-01 17:49:09.255 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.255 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.255
2025-07-01 17:49:09.255 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.255 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.255 alo = 502, ahi = 1101
2025-07-01 17:49:09.255 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.255 blo = 502, bhi = 1101
2025-07-01 17:49:09.255
2025-07-01 17:49:09.255 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.255 r"""
2025-07-01 17:49:09.255 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.255 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.255 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.255 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.255
2025-07-01 17:49:09.255 Example:
2025-07-01 17:49:09.255
2025-07-01 17:49:09.256 >>> d = Differ()
2025-07-01 17:49:09.256 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.256 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.256 >>> print(''.join(results), end="")
2025-07-01 17:49:09.256 - abcDefghiJkl
2025-07-01 17:49:09.256 + abcdefGhijkl
2025-07-01 17:49:09.256 """
2025-07-01 17:49:09.256
2025-07-01 17:49:09.256 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.256 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.256 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.256 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.256 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.256
2025-07-01 17:49:09.256 # search for the pair that matches best without being identical
2025-07-01 17:49:09.256 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.256 # on junk -- unless we have to)
2025-07-01 17:49:09.256 for j in range(blo, bhi):
2025-07-01 17:49:09.256 bj = b[j]
2025-07-01 17:49:09.257 cruncher.set_seq2(bj)
2025-07-01 17:49:09.257 for i in range(alo, ahi):
2025-07-01 17:49:09.257 ai = a[i]
2025-07-01 17:49:09.257 if ai == bj:
2025-07-01 17:49:09.257 if eqi is None:
2025-07-01 17:49:09.257 eqi, eqj = i, j
2025-07-01 17:49:09.257 continue
2025-07-01 17:49:09.257 cruncher.set_seq1(ai)
2025-07-01 17:49:09.257 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.257 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.257 # compares by a factor of 3.
2025-07-01 17:49:09.257 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.257 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.257 # of the computation is cached by cruncher
2025-07-01 17:49:09.257 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.257 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.257 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.257 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.257 if best_ratio < cutoff:
2025-07-01 17:49:09.257 # no non-identical "pretty close" pair
2025-07-01 17:49:09.257 if eqi is None:
2025-07-01 17:49:09.263 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.263 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.263 return
2025-07-01 17:49:09.263 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.263 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.263 else:
2025-07-01 17:49:09.263 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.263 eqi = None
2025-07-01 17:49:09.263
2025-07-01 17:49:09.263 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.263 # identical
2025-07-01 17:49:09.263
2025-07-01 17:49:09.263 # pump out diffs from before the synch point
2025-07-01 17:49:09.263 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.263
2025-07-01 17:49:09.263 # do intraline marking on the synch pair
2025-07-01 17:49:09.263 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.263 if eqi is None:
2025-07-01 17:49:09.263 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.263 atags = btags = ""
2025-07-01 17:49:09.264 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.264 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.264 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.264 if tag == 'replace':
2025-07-01 17:49:09.264 atags += '^' * la
2025-07-01 17:49:09.264 btags += '^' * lb
2025-07-01 17:49:09.264 elif tag == 'delete':
2025-07-01 17:49:09.264 atags += '-' * la
2025-07-01 17:49:09.264 elif tag == 'insert':
2025-07-01 17:49:09.264 btags += '+' * lb
2025-07-01 17:49:09.264 elif tag == 'equal':
2025-07-01 17:49:09.264 atags += ' ' * la
2025-07-01 17:49:09.264 btags += ' ' * lb
2025-07-01 17:49:09.264 else:
2025-07-01 17:49:09.264 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.264 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.264 else:
2025-07-01 17:49:09.264 # the synch pair is identical
2025-07-01 17:49:09.264 yield ' ' + aelt
2025-07-01 17:49:09.264
2025-07-01 17:49:09.264 # pump out diffs from after the synch point
2025-07-01 17:49:09.265 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.265
2025-07-01 17:49:09.265 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.265 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.265
2025-07-01 17:49:09.265 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.265 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.265 alo = 503, ahi = 1101
2025-07-01 17:49:09.265 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.265 blo = 503, bhi = 1101
2025-07-01 17:49:09.265
2025-07-01 17:49:09.265 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.265 g = []
2025-07-01 17:49:09.265 if alo < ahi:
2025-07-01 17:49:09.265 if blo < bhi:
2025-07-01 17:49:09.265 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.265 else:
2025-07-01 17:49:09.265 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.265 elif blo < bhi:
2025-07-01 17:49:09.265 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.265
2025-07-01 17:49:09.266 > yield from g
2025-07-01 17:49:09.266
2025-07-01 17:49:09.266 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.266 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.266
2025-07-01 17:49:09.266 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.266 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.266 alo = 503, ahi = 1101
2025-07-01 17:49:09.266 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.266 blo = 503, bhi = 1101
2025-07-01 17:49:09.266
2025-07-01 17:49:09.266 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.266 r"""
2025-07-01 17:49:09.266 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.266 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.266 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.266 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.266
2025-07-01 17:49:09.266 Example:
2025-07-01 17:49:09.266
2025-07-01 17:49:09.266 >>> d = Differ()
2025-07-01 17:49:09.267 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.267 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.267 >>> print(''.join(results), end="")
2025-07-01 17:49:09.267 - abcDefghiJkl
2025-07-01 17:49:09.267 + abcdefGhijkl
2025-07-01 17:49:09.267 """
2025-07-01 17:49:09.267
2025-07-01 17:49:09.267 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.267 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.267 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.267 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.267 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.267
2025-07-01 17:49:09.267 # search for the pair that matches best without being identical
2025-07-01 17:49:09.267 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.267 # on junk -- unless we have to)
2025-07-01 17:49:09.267 for j in range(blo, bhi):
2025-07-01 17:49:09.267 bj = b[j]
2025-07-01 17:49:09.267 cruncher.set_seq2(bj)
2025-07-01 17:49:09.267 for i in range(alo, ahi):
2025-07-01 17:49:09.268 ai = a[i]
2025-07-01 17:49:09.268 if ai == bj:
2025-07-01 17:49:09.268 if eqi is None:
2025-07-01 17:49:09.268 eqi, eqj = i, j
2025-07-01 17:49:09.268 continue
2025-07-01 17:49:09.268 cruncher.set_seq1(ai)
2025-07-01 17:49:09.268 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.268 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.268 # compares by a factor of 3.
2025-07-01 17:49:09.268 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.268 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.268 # of the computation is cached by cruncher
2025-07-01 17:49:09.268 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.268 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.268 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.268 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.268 if best_ratio < cutoff:
2025-07-01 17:49:09.268 # no non-identical "pretty close" pair
2025-07-01 17:49:09.268 if eqi is None:
2025-07-01 17:49:09.269 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.269 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.269 return
2025-07-01 17:49:09.269 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.269 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.270 else:
2025-07-01 17:49:09.270 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.270 eqi = None
2025-07-01 17:49:09.270
2025-07-01 17:49:09.270 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.270 # identical
2025-07-01 17:49:09.270
2025-07-01 17:49:09.270 # pump out diffs from before the synch point
2025-07-01 17:49:09.270 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.270
2025-07-01 17:49:09.270 # do intraline marking on the synch pair
2025-07-01 17:49:09.270 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.270 if eqi is None:
2025-07-01 17:49:09.270 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.270 atags = btags = ""
2025-07-01 17:49:09.270 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.270 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.270 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.270 if tag == 'replace':
2025-07-01 17:49:09.270 atags += '^' * la
2025-07-01 17:49:09.271 btags += '^' * lb
2025-07-01 17:49:09.271 elif tag == 'delete':
2025-07-01 17:49:09.271 atags += '-' * la
2025-07-01 17:49:09.271 elif tag == 'insert':
2025-07-01 17:49:09.271 btags += '+' * lb
2025-07-01 17:49:09.271 elif tag == 'equal':
2025-07-01 17:49:09.271 atags += ' ' * la
2025-07-01 17:49:09.271 btags += ' ' * lb
2025-07-01 17:49:09.271 else:
2025-07-01 17:49:09.271 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.271 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.271 else:
2025-07-01 17:49:09.271 # the synch pair is identical
2025-07-01 17:49:09.271 yield ' ' + aelt
2025-07-01 17:49:09.271
2025-07-01 17:49:09.271 # pump out diffs from after the synch point
2025-07-01 17:49:09.271 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.271
2025-07-01 17:49:09.271 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.271 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.271
2025-07-01 17:49:09.272 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.272 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.272 alo = 504, ahi = 1101
2025-07-01 17:49:09.272 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.272 blo = 504, bhi = 1101
2025-07-01 17:49:09.272
2025-07-01 17:49:09.272 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.272 g = []
2025-07-01 17:49:09.272 if alo < ahi:
2025-07-01 17:49:09.272 if blo < bhi:
2025-07-01 17:49:09.272 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.272 else:
2025-07-01 17:49:09.272 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.272 elif blo < bhi:
2025-07-01 17:49:09.272 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.272
2025-07-01 17:49:09.272 > yield from g
2025-07-01 17:49:09.272
2025-07-01 17:49:09.272 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.272 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.272
2025-07-01 17:49:09.273 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.273 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.273 alo = 504, ahi = 1101
2025-07-01 17:49:09.273 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.273 blo = 504, bhi = 1101
2025-07-01 17:49:09.273
2025-07-01 17:49:09.273 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.273 r"""
2025-07-01 17:49:09.273 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.273 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.273 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.273 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.273
2025-07-01 17:49:09.273 Example:
2025-07-01 17:49:09.273
2025-07-01 17:49:09.273 >>> d = Differ()
2025-07-01 17:49:09.273 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.273 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.273 >>> print(''.join(results), end="")
2025-07-01 17:49:09.273 - abcDefghiJkl
2025-07-01 17:49:09.274 + abcdefGhijkl
2025-07-01 17:49:09.276 """
2025-07-01 17:49:09.277
2025-07-01 17:49:09.277 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.277 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.277 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.277 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.277 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.277
2025-07-01 17:49:09.277 # search for the pair that matches best without being identical
2025-07-01 17:49:09.277 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.277 # on junk -- unless we have to)
2025-07-01 17:49:09.277 for j in range(blo, bhi):
2025-07-01 17:49:09.277 bj = b[j]
2025-07-01 17:49:09.277 cruncher.set_seq2(bj)
2025-07-01 17:49:09.277 for i in range(alo, ahi):
2025-07-01 17:49:09.277 ai = a[i]
2025-07-01 17:49:09.277 if ai == bj:
2025-07-01 17:49:09.277 if eqi is None:
2025-07-01 17:49:09.277 eqi, eqj = i, j
2025-07-01 17:49:09.277 continue
2025-07-01 17:49:09.277 cruncher.set_seq1(ai)
2025-07-01 17:49:09.278 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.278 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.278 # compares by a factor of 3.
2025-07-01 17:49:09.278 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.278 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.278 # of the computation is cached by cruncher
2025-07-01 17:49:09.278 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.278 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.278 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.278 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.278 if best_ratio < cutoff:
2025-07-01 17:49:09.278 # no non-identical "pretty close" pair
2025-07-01 17:49:09.278 if eqi is None:
2025-07-01 17:49:09.278 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.278 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.278 return
2025-07-01 17:49:09.278 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.278 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.278 else:
2025-07-01 17:49:09.278 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.278 eqi = None
2025-07-01 17:49:09.279
2025-07-01 17:49:09.279 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.279 # identical
2025-07-01 17:49:09.279
2025-07-01 17:49:09.279 # pump out diffs from before the synch point
2025-07-01 17:49:09.279 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.279
2025-07-01 17:49:09.279 # do intraline marking on the synch pair
2025-07-01 17:49:09.279 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.279 if eqi is None:
2025-07-01 17:49:09.279 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.279 atags = btags = ""
2025-07-01 17:49:09.279 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.279 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.279 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.279 if tag == 'replace':
2025-07-01 17:49:09.279 atags += '^' * la
2025-07-01 17:49:09.279 btags += '^' * lb
2025-07-01 17:49:09.279 elif tag == 'delete':
2025-07-01 17:49:09.279 atags += '-' * la
2025-07-01 17:49:09.279 elif tag == 'insert':
2025-07-01 17:49:09.280 btags += '+' * lb
2025-07-01 17:49:09.280 elif tag == 'equal':
2025-07-01 17:49:09.280 atags += ' ' * la
2025-07-01 17:49:09.280 btags += ' ' * lb
2025-07-01 17:49:09.280 else:
2025-07-01 17:49:09.280 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.280 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.280 else:
2025-07-01 17:49:09.280 # the synch pair is identical
2025-07-01 17:49:09.280 yield ' ' + aelt
2025-07-01 17:49:09.280
2025-07-01 17:49:09.280 # pump out diffs from after the synch point
2025-07-01 17:49:09.280 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.280
2025-07-01 17:49:09.280 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.280 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.280
2025-07-01 17:49:09.280 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.280 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.281 alo = 505, ahi = 1101
2025-07-01 17:49:09.281 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.281 blo = 505, bhi = 1101
2025-07-01 17:49:09.281
2025-07-01 17:49:09.281 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.281 g = []
2025-07-01 17:49:09.281 if alo < ahi:
2025-07-01 17:49:09.281 if blo < bhi:
2025-07-01 17:49:09.281 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.281 else:
2025-07-01 17:49:09.281 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.281 elif blo < bhi:
2025-07-01 17:49:09.281 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.281
2025-07-01 17:49:09.281 > yield from g
2025-07-01 17:49:09.281
2025-07-01 17:49:09.281 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.281 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.281
2025-07-01 17:49:09.281 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.281 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.282 alo = 505, ahi = 1101
2025-07-01 17:49:09.282 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.282 blo = 505, bhi = 1101
2025-07-01 17:49:09.282
2025-07-01 17:49:09.282 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.282 r"""
2025-07-01 17:49:09.282 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.282 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.282 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.282 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.282
2025-07-01 17:49:09.282 Example:
2025-07-01 17:49:09.282
2025-07-01 17:49:09.282 >>> d = Differ()
2025-07-01 17:49:09.282 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.282 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.282 >>> print(''.join(results), end="")
2025-07-01 17:49:09.282 - abcDefghiJkl
2025-07-01 17:49:09.282 + abcdefGhijkl
2025-07-01 17:49:09.283 """
2025-07-01 17:49:09.283
2025-07-01 17:49:09.283 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.283 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.283 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.283 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.283 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.283
2025-07-01 17:49:09.283 # search for the pair that matches best without being identical
2025-07-01 17:49:09.283 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.283 # on junk -- unless we have to)
2025-07-01 17:49:09.283 for j in range(blo, bhi):
2025-07-01 17:49:09.283 bj = b[j]
2025-07-01 17:49:09.283 cruncher.set_seq2(bj)
2025-07-01 17:49:09.283 for i in range(alo, ahi):
2025-07-01 17:49:09.283 ai = a[i]
2025-07-01 17:49:09.283 if ai == bj:
2025-07-01 17:49:09.283 if eqi is None:
2025-07-01 17:49:09.283 eqi, eqj = i, j
2025-07-01 17:49:09.283 continue
2025-07-01 17:49:09.283 cruncher.set_seq1(ai)
2025-07-01 17:49:09.284 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.284 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.284 # compares by a factor of 3.
2025-07-01 17:49:09.284 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.284 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.284 # of the computation is cached by cruncher
2025-07-01 17:49:09.284 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.284 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.284 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.284 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.284 if best_ratio < cutoff:
2025-07-01 17:49:09.284 # no non-identical "pretty close" pair
2025-07-01 17:49:09.284 if eqi is None:
2025-07-01 17:49:09.284 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.284 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.284 return
2025-07-01 17:49:09.284 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.284 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.284 else:
2025-07-01 17:49:09.284 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.284 eqi = None
2025-07-01 17:49:09.285
2025-07-01 17:49:09.285 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.285 # identical
2025-07-01 17:49:09.285
2025-07-01 17:49:09.285 # pump out diffs from before the synch point
2025-07-01 17:49:09.285 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.285
2025-07-01 17:49:09.285 # do intraline marking on the synch pair
2025-07-01 17:49:09.285 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.285 if eqi is None:
2025-07-01 17:49:09.285 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.285 atags = btags = ""
2025-07-01 17:49:09.285 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.285 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.285 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.285 if tag == 'replace':
2025-07-01 17:49:09.285 atags += '^' * la
2025-07-01 17:49:09.285 btags += '^' * lb
2025-07-01 17:49:09.285 elif tag == 'delete':
2025-07-01 17:49:09.285 atags += '-' * la
2025-07-01 17:49:09.285 elif tag == 'insert':
2025-07-01 17:49:09.285 btags += '+' * lb
2025-07-01 17:49:09.286 elif tag == 'equal':
2025-07-01 17:49:09.286 atags += ' ' * la
2025-07-01 17:49:09.286 btags += ' ' * lb
2025-07-01 17:49:09.286 else:
2025-07-01 17:49:09.286 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.286 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.286 else:
2025-07-01 17:49:09.286 # the synch pair is identical
2025-07-01 17:49:09.286 yield ' ' + aelt
2025-07-01 17:49:09.286
2025-07-01 17:49:09.286 # pump out diffs from after the synch point
2025-07-01 17:49:09.286 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.286
2025-07-01 17:49:09.286 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.286 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.286
2025-07-01 17:49:09.286 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.286 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.286 alo = 506, ahi = 1101
2025-07-01 17:49:09.286 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.286 blo = 506, bhi = 1101
2025-07-01 17:49:09.286
2025-07-01 17:49:09.287 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.287 g = []
2025-07-01 17:49:09.287 if alo < ahi:
2025-07-01 17:49:09.287 if blo < bhi:
2025-07-01 17:49:09.287 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.287 else:
2025-07-01 17:49:09.287 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.287 elif blo < bhi:
2025-07-01 17:49:09.287 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.287
2025-07-01 17:49:09.287 > yield from g
2025-07-01 17:49:09.287
2025-07-01 17:49:09.287 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.287 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.287
2025-07-01 17:49:09.287 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.287 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.287 alo = 506, ahi = 1101
2025-07-01 17:49:09.287 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.287 blo = 506, bhi = 1101
2025-07-01 17:49:09.287
2025-07-01 17:49:09.288 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.288 r"""
2025-07-01 17:49:09.288 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.288 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.288 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.288 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.288
2025-07-01 17:49:09.288 Example:
2025-07-01 17:49:09.288
2025-07-01 17:49:09.288 >>> d = Differ()
2025-07-01 17:49:09.288 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.288 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.288 >>> print(''.join(results), end="")
2025-07-01 17:49:09.288 - abcDefghiJkl
2025-07-01 17:49:09.288 + abcdefGhijkl
2025-07-01 17:49:09.288 """
2025-07-01 17:49:09.288
2025-07-01 17:49:09.288 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.288 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.289 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.289 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.289 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.289
2025-07-01 17:49:09.289 # search for the pair that matches best without being identical
2025-07-01 17:49:09.289 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.289 # on junk -- unless we have to)
2025-07-01 17:49:09.289 for j in range(blo, bhi):
2025-07-01 17:49:09.289 bj = b[j]
2025-07-01 17:49:09.289 cruncher.set_seq2(bj)
2025-07-01 17:49:09.289 for i in range(alo, ahi):
2025-07-01 17:49:09.289 ai = a[i]
2025-07-01 17:49:09.289 if ai == bj:
2025-07-01 17:49:09.289 if eqi is None:
2025-07-01 17:49:09.289 eqi, eqj = i, j
2025-07-01 17:49:09.289 continue
2025-07-01 17:49:09.289 cruncher.set_seq1(ai)
2025-07-01 17:49:09.289 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.289 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.289 # compares by a factor of 3.
2025-07-01 17:49:09.290 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.295 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.295 # of the computation is cached by cruncher
2025-07-01 17:49:09.295 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.295 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.295 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.295 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.295 if best_ratio < cutoff:
2025-07-01 17:49:09.295 # no non-identical "pretty close" pair
2025-07-01 17:49:09.295 if eqi is None:
2025-07-01 17:49:09.295 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.295 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.295 return
2025-07-01 17:49:09.295 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.295 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.295 else:
2025-07-01 17:49:09.295 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.295 eqi = None
2025-07-01 17:49:09.295
2025-07-01 17:49:09.295 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.296 # identical
2025-07-01 17:49:09.296
2025-07-01 17:49:09.296 # pump out diffs from before the synch point
2025-07-01 17:49:09.296 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.296
2025-07-01 17:49:09.296 # do intraline marking on the synch pair
2025-07-01 17:49:09.296 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.296 if eqi is None:
2025-07-01 17:49:09.296 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.296 atags = btags = ""
2025-07-01 17:49:09.296 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.296 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.296 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.296 if tag == 'replace':
2025-07-01 17:49:09.296 atags += '^' * la
2025-07-01 17:49:09.296 btags += '^' * lb
2025-07-01 17:49:09.296 elif tag == 'delete':
2025-07-01 17:49:09.296 atags += '-' * la
2025-07-01 17:49:09.296 elif tag == 'insert':
2025-07-01 17:49:09.296 btags += '+' * lb
2025-07-01 17:49:09.296 elif tag == 'equal':
2025-07-01 17:49:09.297 atags += ' ' * la
2025-07-01 17:49:09.297 btags += ' ' * lb
2025-07-01 17:49:09.297 else:
2025-07-01 17:49:09.297 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.297 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.297 else:
2025-07-01 17:49:09.297 # the synch pair is identical
2025-07-01 17:49:09.297 yield ' ' + aelt
2025-07-01 17:49:09.297
2025-07-01 17:49:09.297 # pump out diffs from after the synch point
2025-07-01 17:49:09.297 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.297
2025-07-01 17:49:09.297 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.297 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.297
2025-07-01 17:49:09.297 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.297 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.297 alo = 507, ahi = 1101
2025-07-01 17:49:09.297 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.297 blo = 507, bhi = 1101
2025-07-01 17:49:09.297
2025-07-01 17:49:09.298 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.298 g = []
2025-07-01 17:49:09.298 if alo < ahi:
2025-07-01 17:49:09.298 if blo < bhi:
2025-07-01 17:49:09.298 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.298 else:
2025-07-01 17:49:09.298 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.298 elif blo < bhi:
2025-07-01 17:49:09.298 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.298
2025-07-01 17:49:09.298 > yield from g
2025-07-01 17:49:09.298
2025-07-01 17:49:09.298 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.298 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.298
2025-07-01 17:49:09.298 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.298 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.298 alo = 507, ahi = 1101
2025-07-01 17:49:09.298 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.298 blo = 507, bhi = 1101
2025-07-01 17:49:09.298
2025-07-01 17:49:09.299 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.299 r"""
2025-07-01 17:49:09.299 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.299 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.299 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.299 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.299
2025-07-01 17:49:09.299 Example:
2025-07-01 17:49:09.299
2025-07-01 17:49:09.299 >>> d = Differ()
2025-07-01 17:49:09.299 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.299 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.299 >>> print(''.join(results), end="")
2025-07-01 17:49:09.299 - abcDefghiJkl
2025-07-01 17:49:09.299 + abcdefGhijkl
2025-07-01 17:49:09.299 """
2025-07-01 17:49:09.299
2025-07-01 17:49:09.299 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.299 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.300 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.300 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.300 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.300
2025-07-01 17:49:09.300 # search for the pair that matches best without being identical
2025-07-01 17:49:09.300 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.300 # on junk -- unless we have to)
2025-07-01 17:49:09.300 for j in range(blo, bhi):
2025-07-01 17:49:09.300 bj = b[j]
2025-07-01 17:49:09.300 cruncher.set_seq2(bj)
2025-07-01 17:49:09.300 for i in range(alo, ahi):
2025-07-01 17:49:09.300 ai = a[i]
2025-07-01 17:49:09.300 if ai == bj:
2025-07-01 17:49:09.300 if eqi is None:
2025-07-01 17:49:09.300 eqi, eqj = i, j
2025-07-01 17:49:09.300 continue
2025-07-01 17:49:09.300 cruncher.set_seq1(ai)
2025-07-01 17:49:09.300 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.300 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.300 # compares by a factor of 3.
2025-07-01 17:49:09.300 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.300 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.301 # of the computation is cached by cruncher
2025-07-01 17:49:09.301 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.301 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.301 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.301 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.301 if best_ratio < cutoff:
2025-07-01 17:49:09.301 # no non-identical "pretty close" pair
2025-07-01 17:49:09.301 if eqi is None:
2025-07-01 17:49:09.301 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.301 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.301 return
2025-07-01 17:49:09.301 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.301 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.301 else:
2025-07-01 17:49:09.301 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.301 eqi = None
2025-07-01 17:49:09.301
2025-07-01 17:49:09.301 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.301 # identical
2025-07-01 17:49:09.302
2025-07-01 17:49:09.302 # pump out diffs from before the synch point
2025-07-01 17:49:09.302 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.302
2025-07-01 17:49:09.302 # do intraline marking on the synch pair
2025-07-01 17:49:09.302 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.302 if eqi is None:
2025-07-01 17:49:09.302 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.302 atags = btags = ""
2025-07-01 17:49:09.302 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.302 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.302 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.302 if tag == 'replace':
2025-07-01 17:49:09.302 atags += '^' * la
2025-07-01 17:49:09.302 btags += '^' * lb
2025-07-01 17:49:09.302 elif tag == 'delete':
2025-07-01 17:49:09.302 atags += '-' * la
2025-07-01 17:49:09.302 elif tag == 'insert':
2025-07-01 17:49:09.302 btags += '+' * lb
2025-07-01 17:49:09.302 elif tag == 'equal':
2025-07-01 17:49:09.303 atags += ' ' * la
2025-07-01 17:49:09.303 btags += ' ' * lb
2025-07-01 17:49:09.303 else:
2025-07-01 17:49:09.303 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.303 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.303 else:
2025-07-01 17:49:09.303 # the synch pair is identical
2025-07-01 17:49:09.303 yield ' ' + aelt
2025-07-01 17:49:09.303
2025-07-01 17:49:09.303 # pump out diffs from after the synch point
2025-07-01 17:49:09.303 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.303
2025-07-01 17:49:09.303 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.303 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.303
2025-07-01 17:49:09.303 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.303 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.303 alo = 510, ahi = 1101
2025-07-01 17:49:09.303 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.303 blo = 510, bhi = 1101
2025-07-01 17:49:09.304
2025-07-01 17:49:09.304 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.304 g = []
2025-07-01 17:49:09.304 if alo < ahi:
2025-07-01 17:49:09.304 if blo < bhi:
2025-07-01 17:49:09.304 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.304 else:
2025-07-01 17:49:09.304 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.304 elif blo < bhi:
2025-07-01 17:49:09.304 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.304
2025-07-01 17:49:09.304 > yield from g
2025-07-01 17:49:09.304
2025-07-01 17:49:09.304 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.304 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.304
2025-07-01 17:49:09.304 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.304 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.304 alo = 510, ahi = 1101
2025-07-01 17:49:09.304 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.304 blo = 510, bhi = 1101
2025-07-01 17:49:09.305
2025-07-01 17:49:09.308 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.308 r"""
2025-07-01 17:49:09.308 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.308 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.308 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.308 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.308
2025-07-01 17:49:09.308 Example:
2025-07-01 17:49:09.308
2025-07-01 17:49:09.308 >>> d = Differ()
2025-07-01 17:49:09.308 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.308 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.308 >>> print(''.join(results), end="")
2025-07-01 17:49:09.308 - abcDefghiJkl
2025-07-01 17:49:09.308 + abcdefGhijkl
2025-07-01 17:49:09.308 """
2025-07-01 17:49:09.309
2025-07-01 17:49:09.309 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.309 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.309 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.309 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.309 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.309
2025-07-01 17:49:09.309 # search for the pair that matches best without being identical
2025-07-01 17:49:09.309 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.309 # on junk -- unless we have to)
2025-07-01 17:49:09.309 for j in range(blo, bhi):
2025-07-01 17:49:09.309 bj = b[j]
2025-07-01 17:49:09.309 cruncher.set_seq2(bj)
2025-07-01 17:49:09.309 for i in range(alo, ahi):
2025-07-01 17:49:09.309 ai = a[i]
2025-07-01 17:49:09.309 if ai == bj:
2025-07-01 17:49:09.309 if eqi is None:
2025-07-01 17:49:09.309 eqi, eqj = i, j
2025-07-01 17:49:09.309 continue
2025-07-01 17:49:09.309 cruncher.set_seq1(ai)
2025-07-01 17:49:09.309 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.310 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.310 # compares by a factor of 3.
2025-07-01 17:49:09.310 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.310 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.310 # of the computation is cached by cruncher
2025-07-01 17:49:09.310 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.310 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.310 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.310 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.310 if best_ratio < cutoff:
2025-07-01 17:49:09.310 # no non-identical "pretty close" pair
2025-07-01 17:49:09.310 if eqi is None:
2025-07-01 17:49:09.310 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.310 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.310 return
2025-07-01 17:49:09.310 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.310 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.310 else:
2025-07-01 17:49:09.310 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.310 eqi = None
2025-07-01 17:49:09.311
2025-07-01 17:49:09.311 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.311 # identical
2025-07-01 17:49:09.311
2025-07-01 17:49:09.311 # pump out diffs from before the synch point
2025-07-01 17:49:09.311 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.311
2025-07-01 17:49:09.311 # do intraline marking on the synch pair
2025-07-01 17:49:09.311 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.311 if eqi is None:
2025-07-01 17:49:09.311 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.311 atags = btags = ""
2025-07-01 17:49:09.311 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.311 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.311 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.311 if tag == 'replace':
2025-07-01 17:49:09.311 atags += '^' * la
2025-07-01 17:49:09.311 btags += '^' * lb
2025-07-01 17:49:09.311 elif tag == 'delete':
2025-07-01 17:49:09.311 atags += '-' * la
2025-07-01 17:49:09.312 elif tag == 'insert':
2025-07-01 17:49:09.312 btags += '+' * lb
2025-07-01 17:49:09.312 elif tag == 'equal':
2025-07-01 17:49:09.312 atags += ' ' * la
2025-07-01 17:49:09.312 btags += ' ' * lb
2025-07-01 17:49:09.312 else:
2025-07-01 17:49:09.312 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.312 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.312 else:
2025-07-01 17:49:09.312 # the synch pair is identical
2025-07-01 17:49:09.312 yield ' ' + aelt
2025-07-01 17:49:09.312
2025-07-01 17:49:09.312 # pump out diffs from after the synch point
2025-07-01 17:49:09.312 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.312
2025-07-01 17:49:09.312 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.312 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.312
2025-07-01 17:49:09.312 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.312 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.312 alo = 511, ahi = 1101
2025-07-01 17:49:09.313 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.313 blo = 511, bhi = 1101
2025-07-01 17:49:09.313
2025-07-01 17:49:09.313 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.313 g = []
2025-07-01 17:49:09.313 if alo < ahi:
2025-07-01 17:49:09.313 if blo < bhi:
2025-07-01 17:49:09.313 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.313 else:
2025-07-01 17:49:09.313 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.313 elif blo < bhi:
2025-07-01 17:49:09.313 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.313
2025-07-01 17:49:09.313 > yield from g
2025-07-01 17:49:09.313
2025-07-01 17:49:09.313 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.313 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.313
2025-07-01 17:49:09.313 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.314 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.314 alo = 511, ahi = 1101
2025-07-01 17:49:09.314 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.314 blo = 511, bhi = 1101
2025-07-01 17:49:09.314
2025-07-01 17:49:09.314 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.314 r"""
2025-07-01 17:49:09.314 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.314 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.314 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.314 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.314
2025-07-01 17:49:09.314 Example:
2025-07-01 17:49:09.314
2025-07-01 17:49:09.314 >>> d = Differ()
2025-07-01 17:49:09.314 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.314 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.314 >>> print(''.join(results), end="")
2025-07-01 17:49:09.314 - abcDefghiJkl
2025-07-01 17:49:09.315 + abcdefGhijkl
2025-07-01 17:49:09.315 """
2025-07-01 17:49:09.315
2025-07-01 17:49:09.315 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.315 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.315 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.315 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.315 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.315
2025-07-01 17:49:09.315 # search for the pair that matches best without being identical
2025-07-01 17:49:09.315 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.315 # on junk -- unless we have to)
2025-07-01 17:49:09.315 for j in range(blo, bhi):
2025-07-01 17:49:09.315 bj = b[j]
2025-07-01 17:49:09.315 cruncher.set_seq2(bj)
2025-07-01 17:49:09.315 for i in range(alo, ahi):
2025-07-01 17:49:09.315 ai = a[i]
2025-07-01 17:49:09.315 if ai == bj:
2025-07-01 17:49:09.315 if eqi is None:
2025-07-01 17:49:09.316 eqi, eqj = i, j
2025-07-01 17:49:09.316 continue
2025-07-01 17:49:09.316 cruncher.set_seq1(ai)
2025-07-01 17:49:09.316 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.316 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.316 # compares by a factor of 3.
2025-07-01 17:49:09.316 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.316 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.316 # of the computation is cached by cruncher
2025-07-01 17:49:09.316 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.316 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.316 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.316 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.316 if best_ratio < cutoff:
2025-07-01 17:49:09.316 # no non-identical "pretty close" pair
2025-07-01 17:49:09.316 if eqi is None:
2025-07-01 17:49:09.316 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.316 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.316 return
2025-07-01 17:49:09.316 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.316 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.317 else:
2025-07-01 17:49:09.317 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.317 eqi = None
2025-07-01 17:49:09.317
2025-07-01 17:49:09.317 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.317 # identical
2025-07-01 17:49:09.317
2025-07-01 17:49:09.317 # pump out diffs from before the synch point
2025-07-01 17:49:09.317 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.317
2025-07-01 17:49:09.317 # do intraline marking on the synch pair
2025-07-01 17:49:09.317 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.317 if eqi is None:
2025-07-01 17:49:09.317 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.317 atags = btags = ""
2025-07-01 17:49:09.317 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.317 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.317 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.317 if tag == 'replace':
2025-07-01 17:49:09.317 atags += '^' * la
2025-07-01 17:49:09.318 btags += '^' * lb
2025-07-01 17:49:09.318 elif tag == 'delete':
2025-07-01 17:49:09.318 atags += '-' * la
2025-07-01 17:49:09.318 elif tag == 'insert':
2025-07-01 17:49:09.318 btags += '+' * lb
2025-07-01 17:49:09.318 elif tag == 'equal':
2025-07-01 17:49:09.318 atags += ' ' * la
2025-07-01 17:49:09.318 btags += ' ' * lb
2025-07-01 17:49:09.318 else:
2025-07-01 17:49:09.318 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.318 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.318 else:
2025-07-01 17:49:09.318 # the synch pair is identical
2025-07-01 17:49:09.318 yield ' ' + aelt
2025-07-01 17:49:09.318
2025-07-01 17:49:09.318 # pump out diffs from after the synch point
2025-07-01 17:49:09.318 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.318
2025-07-01 17:49:09.318 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.318 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.318
2025-07-01 17:49:09.319 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.319 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.319 alo = 512, ahi = 1101
2025-07-01 17:49:09.319 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.319 blo = 512, bhi = 1101
2025-07-01 17:49:09.319
2025-07-01 17:49:09.319 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.319 g = []
2025-07-01 17:49:09.319 if alo < ahi:
2025-07-01 17:49:09.319 if blo < bhi:
2025-07-01 17:49:09.319 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.319 else:
2025-07-01 17:49:09.319 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.319 elif blo < bhi:
2025-07-01 17:49:09.319 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.319
2025-07-01 17:49:09.319 > yield from g
2025-07-01 17:49:09.319
2025-07-01 17:49:09.319 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.319 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.319
2025-07-01 17:49:09.320 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.320 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.320 alo = 512, ahi = 1101
2025-07-01 17:49:09.320 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.320 blo = 512, bhi = 1101
2025-07-01 17:49:09.320
2025-07-01 17:49:09.320 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.320 r"""
2025-07-01 17:49:09.320 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.320 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.320 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.320 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.320
2025-07-01 17:49:09.320 Example:
2025-07-01 17:49:09.320
2025-07-01 17:49:09.320 >>> d = Differ()
2025-07-01 17:49:09.320 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.320 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.320 >>> print(''.join(results), end="")
2025-07-01 17:49:09.320 - abcDefghiJkl
2025-07-01 17:49:09.326 + abcdefGhijkl
2025-07-01 17:49:09.326 """
2025-07-01 17:49:09.326
2025-07-01 17:49:09.326 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.326 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.326 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.326 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.326 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.326
2025-07-01 17:49:09.326 # search for the pair that matches best without being identical
2025-07-01 17:49:09.326 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.326 # on junk -- unless we have to)
2025-07-01 17:49:09.326 for j in range(blo, bhi):
2025-07-01 17:49:09.326 bj = b[j]
2025-07-01 17:49:09.326 cruncher.set_seq2(bj)
2025-07-01 17:49:09.326 for i in range(alo, ahi):
2025-07-01 17:49:09.326 ai = a[i]
2025-07-01 17:49:09.327 if ai == bj:
2025-07-01 17:49:09.327 if eqi is None:
2025-07-01 17:49:09.327 eqi, eqj = i, j
2025-07-01 17:49:09.327 continue
2025-07-01 17:49:09.327 cruncher.set_seq1(ai)
2025-07-01 17:49:09.327 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.327 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.327 # compares by a factor of 3.
2025-07-01 17:49:09.327 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.327 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.327 # of the computation is cached by cruncher
2025-07-01 17:49:09.327 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.327 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.327 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.327 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.327 if best_ratio < cutoff:
2025-07-01 17:49:09.327 # no non-identical "pretty close" pair
2025-07-01 17:49:09.327 if eqi is None:
2025-07-01 17:49:09.327 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.327 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.328 return
2025-07-01 17:49:09.328 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.328 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.328 else:
2025-07-01 17:49:09.328 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.328 eqi = None
2025-07-01 17:49:09.328
2025-07-01 17:49:09.328 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.328 # identical
2025-07-01 17:49:09.328
2025-07-01 17:49:09.328 # pump out diffs from before the synch point
2025-07-01 17:49:09.328 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.328
2025-07-01 17:49:09.328 # do intraline marking on the synch pair
2025-07-01 17:49:09.328 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.328 if eqi is None:
2025-07-01 17:49:09.328 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.328 atags = btags = ""
2025-07-01 17:49:09.328 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.328 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.328 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.329 if tag == 'replace':
2025-07-01 17:49:09.329 atags += '^' * la
2025-07-01 17:49:09.329 btags += '^' * lb
2025-07-01 17:49:09.329 elif tag == 'delete':
2025-07-01 17:49:09.329 atags += '-' * la
2025-07-01 17:49:09.329 elif tag == 'insert':
2025-07-01 17:49:09.329 btags += '+' * lb
2025-07-01 17:49:09.329 elif tag == 'equal':
2025-07-01 17:49:09.329 atags += ' ' * la
2025-07-01 17:49:09.329 btags += ' ' * lb
2025-07-01 17:49:09.329 else:
2025-07-01 17:49:09.329 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.329 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.329 else:
2025-07-01 17:49:09.329 # the synch pair is identical
2025-07-01 17:49:09.329 yield ' ' + aelt
2025-07-01 17:49:09.329
2025-07-01 17:49:09.329 # pump out diffs from after the synch point
2025-07-01 17:49:09.329 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.329
2025-07-01 17:49:09.329 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.329 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.330
2025-07-01 17:49:09.330 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.330 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.330 alo = 513, ahi = 1101
2025-07-01 17:49:09.330 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.330 blo = 513, bhi = 1101
2025-07-01 17:49:09.330
2025-07-01 17:49:09.330 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.330 g = []
2025-07-01 17:49:09.330 if alo < ahi:
2025-07-01 17:49:09.330 if blo < bhi:
2025-07-01 17:49:09.330 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.330 else:
2025-07-01 17:49:09.330 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.330 elif blo < bhi:
2025-07-01 17:49:09.330 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.330
2025-07-01 17:49:09.330 > yield from g
2025-07-01 17:49:09.330
2025-07-01 17:49:09.330 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.330 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.331
2025-07-01 17:49:09.331 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.331 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.331 alo = 513, ahi = 1101
2025-07-01 17:49:09.331 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.331 blo = 513, bhi = 1101
2025-07-01 17:49:09.331
2025-07-01 17:49:09.331 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.331 r"""
2025-07-01 17:49:09.331 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.331 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.331 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.331 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.331
2025-07-01 17:49:09.331 Example:
2025-07-01 17:49:09.331
2025-07-01 17:49:09.331 >>> d = Differ()
2025-07-01 17:49:09.331 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.331 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.331 >>> print(''.join(results), end="")
2025-07-01 17:49:09.331 - abcDefghiJkl
2025-07-01 17:49:09.332 + abcdefGhijkl
2025-07-01 17:49:09.332 """
2025-07-01 17:49:09.332
2025-07-01 17:49:09.332 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.332 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.332 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.332 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.332 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.332
2025-07-01 17:49:09.332 # search for the pair that matches best without being identical
2025-07-01 17:49:09.332 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.332 # on junk -- unless we have to)
2025-07-01 17:49:09.332 for j in range(blo, bhi):
2025-07-01 17:49:09.332 bj = b[j]
2025-07-01 17:49:09.332 cruncher.set_seq2(bj)
2025-07-01 17:49:09.332 for i in range(alo, ahi):
2025-07-01 17:49:09.332 ai = a[i]
2025-07-01 17:49:09.332 if ai == bj:
2025-07-01 17:49:09.332 if eqi is None:
2025-07-01 17:49:09.332 eqi, eqj = i, j
2025-07-01 17:49:09.333 continue
2025-07-01 17:49:09.333 cruncher.set_seq1(ai)
2025-07-01 17:49:09.333 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.333 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.333 # compares by a factor of 3.
2025-07-01 17:49:09.333 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.333 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.333 # of the computation is cached by cruncher
2025-07-01 17:49:09.333 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.333 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.333 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.333 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.333 if best_ratio < cutoff:
2025-07-01 17:49:09.333 # no non-identical "pretty close" pair
2025-07-01 17:49:09.333 if eqi is None:
2025-07-01 17:49:09.333 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.333 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.333 return
2025-07-01 17:49:09.333 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.333 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.333 else:
2025-07-01 17:49:09.333 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.334 eqi = None
2025-07-01 17:49:09.334
2025-07-01 17:49:09.334 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.334 # identical
2025-07-01 17:49:09.334
2025-07-01 17:49:09.334 # pump out diffs from before the synch point
2025-07-01 17:49:09.334 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.334
2025-07-01 17:49:09.334 # do intraline marking on the synch pair
2025-07-01 17:49:09.334 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.334 if eqi is None:
2025-07-01 17:49:09.334 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.334 atags = btags = ""
2025-07-01 17:49:09.334 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.334 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.334 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.334 if tag == 'replace':
2025-07-01 17:49:09.334 atags += '^' * la
2025-07-01 17:49:09.334 btags += '^' * lb
2025-07-01 17:49:09.334 elif tag == 'delete':
2025-07-01 17:49:09.334 atags += '-' * la
2025-07-01 17:49:09.334 elif tag == 'insert':
2025-07-01 17:49:09.335 btags += '+' * lb
2025-07-01 17:49:09.335 elif tag == 'equal':
2025-07-01 17:49:09.335 atags += ' ' * la
2025-07-01 17:49:09.335 btags += ' ' * lb
2025-07-01 17:49:09.335 else:
2025-07-01 17:49:09.335 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.335 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.335 else:
2025-07-01 17:49:09.335 # the synch pair is identical
2025-07-01 17:49:09.335 yield ' ' + aelt
2025-07-01 17:49:09.335
2025-07-01 17:49:09.335 # pump out diffs from after the synch point
2025-07-01 17:49:09.335 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.335
2025-07-01 17:49:09.335 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.335 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.335
2025-07-01 17:49:09.335 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.335 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.335 alo = 514, ahi = 1101
2025-07-01 17:49:09.339 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.339 blo = 514, bhi = 1101
2025-07-01 17:49:09.339
2025-07-01 17:49:09.339 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.339 g = []
2025-07-01 17:49:09.339 if alo < ahi:
2025-07-01 17:49:09.339 if blo < bhi:
2025-07-01 17:49:09.339 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.339 else:
2025-07-01 17:49:09.339 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.339 elif blo < bhi:
2025-07-01 17:49:09.339 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.339
2025-07-01 17:49:09.339 > yield from g
2025-07-01 17:49:09.339
2025-07-01 17:49:09.339 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.339 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.339
2025-07-01 17:49:09.339 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.339 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.340 alo = 514, ahi = 1101
2025-07-01 17:49:09.340 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.340 blo = 514, bhi = 1101
2025-07-01 17:49:09.340
2025-07-01 17:49:09.340 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.340 r"""
2025-07-01 17:49:09.340 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.340 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.340 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.340 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.340
2025-07-01 17:49:09.340 Example:
2025-07-01 17:49:09.340
2025-07-01 17:49:09.340 >>> d = Differ()
2025-07-01 17:49:09.340 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.340 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.340 >>> print(''.join(results), end="")
2025-07-01 17:49:09.340 - abcDefghiJkl
2025-07-01 17:49:09.340 + abcdefGhijkl
2025-07-01 17:49:09.341 """
2025-07-01 17:49:09.341
2025-07-01 17:49:09.341 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.341 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.341 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.341 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.341 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.341
2025-07-01 17:49:09.341 # search for the pair that matches best without being identical
2025-07-01 17:49:09.341 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.341 # on junk -- unless we have to)
2025-07-01 17:49:09.341 for j in range(blo, bhi):
2025-07-01 17:49:09.341 bj = b[j]
2025-07-01 17:49:09.341 cruncher.set_seq2(bj)
2025-07-01 17:49:09.341 for i in range(alo, ahi):
2025-07-01 17:49:09.341 ai = a[i]
2025-07-01 17:49:09.341 if ai == bj:
2025-07-01 17:49:09.341 if eqi is None:
2025-07-01 17:49:09.341 eqi, eqj = i, j
2025-07-01 17:49:09.341 continue
2025-07-01 17:49:09.341 cruncher.set_seq1(ai)
2025-07-01 17:49:09.342 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.342 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.342 # compares by a factor of 3.
2025-07-01 17:49:09.342 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.342 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.342 # of the computation is cached by cruncher
2025-07-01 17:49:09.342 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.342 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.343 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.343 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.343 if best_ratio < cutoff:
2025-07-01 17:49:09.343 # no non-identical "pretty close" pair
2025-07-01 17:49:09.343 if eqi is None:
2025-07-01 17:49:09.343 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.343 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.343 return
2025-07-01 17:49:09.343 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.343 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.343 else:
2025-07-01 17:49:09.343 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.343 eqi = None
2025-07-01 17:49:09.343
2025-07-01 17:49:09.343 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.343 # identical
2025-07-01 17:49:09.343
2025-07-01 17:49:09.343 # pump out diffs from before the synch point
2025-07-01 17:49:09.343 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.343
2025-07-01 17:49:09.344 # do intraline marking on the synch pair
2025-07-01 17:49:09.344 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.344 if eqi is None:
2025-07-01 17:49:09.344 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.344 atags = btags = ""
2025-07-01 17:49:09.344 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.344 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.344 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.344 if tag == 'replace':
2025-07-01 17:49:09.344 atags += '^' * la
2025-07-01 17:49:09.344 btags += '^' * lb
2025-07-01 17:49:09.344 elif tag == 'delete':
2025-07-01 17:49:09.344 atags += '-' * la
2025-07-01 17:49:09.344 elif tag == 'insert':
2025-07-01 17:49:09.344 btags += '+' * lb
2025-07-01 17:49:09.344 elif tag == 'equal':
2025-07-01 17:49:09.344 atags += ' ' * la
2025-07-01 17:49:09.344 btags += ' ' * lb
2025-07-01 17:49:09.344 else:
2025-07-01 17:49:09.344 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.344 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.344 else:
2025-07-01 17:49:09.345 # the synch pair is identical
2025-07-01 17:49:09.345 yield ' ' + aelt
2025-07-01 17:49:09.345
2025-07-01 17:49:09.345 # pump out diffs from after the synch point
2025-07-01 17:49:09.345 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.345
2025-07-01 17:49:09.345 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.345 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.345
2025-07-01 17:49:09.345 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.345 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.345 alo = 515, ahi = 1101
2025-07-01 17:49:09.345 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.345 blo = 515, bhi = 1101
2025-07-01 17:49:09.345
2025-07-01 17:49:09.345 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.345 g = []
2025-07-01 17:49:09.345 if alo < ahi:
2025-07-01 17:49:09.345 if blo < bhi:
2025-07-01 17:49:09.345 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.345 else:
2025-07-01 17:49:09.346 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.346 elif blo < bhi:
2025-07-01 17:49:09.346 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.346
2025-07-01 17:49:09.346 > yield from g
2025-07-01 17:49:09.346
2025-07-01 17:49:09.346 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.346 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.346
2025-07-01 17:49:09.346 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.346 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.346 alo = 515, ahi = 1101
2025-07-01 17:49:09.346 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.346 blo = 515, bhi = 1101
2025-07-01 17:49:09.346
2025-07-01 17:49:09.346 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.346 r"""
2025-07-01 17:49:09.346 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.346 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.347 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.347 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.347
2025-07-01 17:49:09.347 Example:
2025-07-01 17:49:09.347
2025-07-01 17:49:09.347 >>> d = Differ()
2025-07-01 17:49:09.347 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.347 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.347 >>> print(''.join(results), end="")
2025-07-01 17:49:09.347 - abcDefghiJkl
2025-07-01 17:49:09.347 + abcdefGhijkl
2025-07-01 17:49:09.347 """
2025-07-01 17:49:09.347
2025-07-01 17:49:09.347 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.347 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.347 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.347 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.347 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.348
2025-07-01 17:49:09.348 # search for the pair that matches best without being identical
2025-07-01 17:49:09.348 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.348 # on junk -- unless we have to)
2025-07-01 17:49:09.348 for j in range(blo, bhi):
2025-07-01 17:49:09.348 bj = b[j]
2025-07-01 17:49:09.348 cruncher.set_seq2(bj)
2025-07-01 17:49:09.348 for i in range(alo, ahi):
2025-07-01 17:49:09.348 ai = a[i]
2025-07-01 17:49:09.348 if ai == bj:
2025-07-01 17:49:09.348 if eqi is None:
2025-07-01 17:49:09.348 eqi, eqj = i, j
2025-07-01 17:49:09.348 continue
2025-07-01 17:49:09.348 cruncher.set_seq1(ai)
2025-07-01 17:49:09.348 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.348 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.348 # compares by a factor of 3.
2025-07-01 17:49:09.348 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.348 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.348 # of the computation is cached by cruncher
2025-07-01 17:49:09.348 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.349 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.349 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.349 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.349 if best_ratio < cutoff:
2025-07-01 17:49:09.349 # no non-identical "pretty close" pair
2025-07-01 17:49:09.349 if eqi is None:
2025-07-01 17:49:09.349 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.349 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.349 return
2025-07-01 17:49:09.349 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.349 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.349 else:
2025-07-01 17:49:09.349 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.349 eqi = None
2025-07-01 17:49:09.349
2025-07-01 17:49:09.349 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.349 # identical
2025-07-01 17:49:09.349
2025-07-01 17:49:09.349 # pump out diffs from before the synch point
2025-07-01 17:49:09.349 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.350
2025-07-01 17:49:09.350 # do intraline marking on the synch pair
2025-07-01 17:49:09.350 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.350 if eqi is None:
2025-07-01 17:49:09.350 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.350 atags = btags = ""
2025-07-01 17:49:09.350 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.350 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.350 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.350 if tag == 'replace':
2025-07-01 17:49:09.350 atags += '^' * la
2025-07-01 17:49:09.350 btags += '^' * lb
2025-07-01 17:49:09.350 elif tag == 'delete':
2025-07-01 17:49:09.350 atags += '-' * la
2025-07-01 17:49:09.350 elif tag == 'insert':
2025-07-01 17:49:09.350 btags += '+' * lb
2025-07-01 17:49:09.350 elif tag == 'equal':
2025-07-01 17:49:09.350 atags += ' ' * la
2025-07-01 17:49:09.350 btags += ' ' * lb
2025-07-01 17:49:09.350 else:
2025-07-01 17:49:09.350 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.351 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.351 else:
2025-07-01 17:49:09.351 # the synch pair is identical
2025-07-01 17:49:09.351 yield ' ' + aelt
2025-07-01 17:49:09.351
2025-07-01 17:49:09.351 # pump out diffs from after the synch point
2025-07-01 17:49:09.351 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.351
2025-07-01 17:49:09.351 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.351 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.351
2025-07-01 17:49:09.351 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.351 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.351 alo = 516, ahi = 1101
2025-07-01 17:49:09.351 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.351 blo = 516, bhi = 1101
2025-07-01 17:49:09.351
2025-07-01 17:49:09.351 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.351 g = []
2025-07-01 17:49:09.351 if alo < ahi:
2025-07-01 17:49:09.352 if blo < bhi:
2025-07-01 17:49:09.357 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.357 else:
2025-07-01 17:49:09.357 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.357 elif blo < bhi:
2025-07-01 17:49:09.357 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.357
2025-07-01 17:49:09.357 > yield from g
2025-07-01 17:49:09.357
2025-07-01 17:49:09.357 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.357 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.357
2025-07-01 17:49:09.357 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.357 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.357 alo = 516, ahi = 1101
2025-07-01 17:49:09.357 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.357 blo = 516, bhi = 1101
2025-07-01 17:49:09.357
2025-07-01 17:49:09.357 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.357 r"""
2025-07-01 17:49:09.358 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.358 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.358 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.358 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.358
2025-07-01 17:49:09.358 Example:
2025-07-01 17:49:09.358
2025-07-01 17:49:09.358 >>> d = Differ()
2025-07-01 17:49:09.358 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.358 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.358 >>> print(''.join(results), end="")
2025-07-01 17:49:09.358 - abcDefghiJkl
2025-07-01 17:49:09.358 + abcdefGhijkl
2025-07-01 17:49:09.358 """
2025-07-01 17:49:09.358
2025-07-01 17:49:09.358 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.358 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.358 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.358 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.359 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.359
2025-07-01 17:49:09.359 # search for the pair that matches best without being identical
2025-07-01 17:49:09.359 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.359 # on junk -- unless we have to)
2025-07-01 17:49:09.359 for j in range(blo, bhi):
2025-07-01 17:49:09.359 bj = b[j]
2025-07-01 17:49:09.359 cruncher.set_seq2(bj)
2025-07-01 17:49:09.359 for i in range(alo, ahi):
2025-07-01 17:49:09.359 ai = a[i]
2025-07-01 17:49:09.359 if ai == bj:
2025-07-01 17:49:09.359 if eqi is None:
2025-07-01 17:49:09.359 eqi, eqj = i, j
2025-07-01 17:49:09.359 continue
2025-07-01 17:49:09.359 cruncher.set_seq1(ai)
2025-07-01 17:49:09.359 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.359 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.359 # compares by a factor of 3.
2025-07-01 17:49:09.359 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.359 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.359 # of the computation is cached by cruncher
2025-07-01 17:49:09.360 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.360 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.360 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.360 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.360 if best_ratio < cutoff:
2025-07-01 17:49:09.360 # no non-identical "pretty close" pair
2025-07-01 17:49:09.360 if eqi is None:
2025-07-01 17:49:09.360 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.360 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.360 return
2025-07-01 17:49:09.360 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.360 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.360 else:
2025-07-01 17:49:09.360 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.360 eqi = None
2025-07-01 17:49:09.360
2025-07-01 17:49:09.360 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.360 # identical
2025-07-01 17:49:09.360
2025-07-01 17:49:09.361 # pump out diffs from before the synch point
2025-07-01 17:49:09.361 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.361
2025-07-01 17:49:09.361 # do intraline marking on the synch pair
2025-07-01 17:49:09.361 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.361 if eqi is None:
2025-07-01 17:49:09.361 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.361 atags = btags = ""
2025-07-01 17:49:09.361 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.361 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.361 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.361 if tag == 'replace':
2025-07-01 17:49:09.361 atags += '^' * la
2025-07-01 17:49:09.361 btags += '^' * lb
2025-07-01 17:49:09.361 elif tag == 'delete':
2025-07-01 17:49:09.361 atags += '-' * la
2025-07-01 17:49:09.361 elif tag == 'insert':
2025-07-01 17:49:09.361 btags += '+' * lb
2025-07-01 17:49:09.361 elif tag == 'equal':
2025-07-01 17:49:09.361 atags += ' ' * la
2025-07-01 17:49:09.362 btags += ' ' * lb
2025-07-01 17:49:09.362 else:
2025-07-01 17:49:09.362 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.362 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.362 else:
2025-07-01 17:49:09.362 # the synch pair is identical
2025-07-01 17:49:09.362 yield ' ' + aelt
2025-07-01 17:49:09.362
2025-07-01 17:49:09.362 # pump out diffs from after the synch point
2025-07-01 17:49:09.362 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.362
2025-07-01 17:49:09.362 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.362 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.362
2025-07-01 17:49:09.362 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.362 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.362 alo = 517, ahi = 1101
2025-07-01 17:49:09.362 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.362 blo = 517, bhi = 1101
2025-07-01 17:49:09.362
2025-07-01 17:49:09.362 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.363 g = []
2025-07-01 17:49:09.363 if alo < ahi:
2025-07-01 17:49:09.363 if blo < bhi:
2025-07-01 17:49:09.363 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.363 else:
2025-07-01 17:49:09.363 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.363 elif blo < bhi:
2025-07-01 17:49:09.363 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.363
2025-07-01 17:49:09.363 > yield from g
2025-07-01 17:49:09.363
2025-07-01 17:49:09.363 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.363 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.363
2025-07-01 17:49:09.363 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.363 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.363 alo = 517, ahi = 1101
2025-07-01 17:49:09.363 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.363 blo = 517, bhi = 1101
2025-07-01 17:49:09.363
2025-07-01 17:49:09.364 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.364 r"""
2025-07-01 17:49:09.364 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.364 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.364 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.364 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.364
2025-07-01 17:49:09.364 Example:
2025-07-01 17:49:09.364
2025-07-01 17:49:09.364 >>> d = Differ()
2025-07-01 17:49:09.364 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.364 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.364 >>> print(''.join(results), end="")
2025-07-01 17:49:09.364 - abcDefghiJkl
2025-07-01 17:49:09.364 + abcdefGhijkl
2025-07-01 17:49:09.364 """
2025-07-01 17:49:09.364
2025-07-01 17:49:09.364 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.364 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.365 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.365 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.365 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.365
2025-07-01 17:49:09.365 # search for the pair that matches best without being identical
2025-07-01 17:49:09.365 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.365 # on junk -- unless we have to)
2025-07-01 17:49:09.365 for j in range(blo, bhi):
2025-07-01 17:49:09.365 bj = b[j]
2025-07-01 17:49:09.365 cruncher.set_seq2(bj)
2025-07-01 17:49:09.365 for i in range(alo, ahi):
2025-07-01 17:49:09.365 ai = a[i]
2025-07-01 17:49:09.365 if ai == bj:
2025-07-01 17:49:09.365 if eqi is None:
2025-07-01 17:49:09.365 eqi, eqj = i, j
2025-07-01 17:49:09.365 continue
2025-07-01 17:49:09.365 cruncher.set_seq1(ai)
2025-07-01 17:49:09.365 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.365 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.365 # compares by a factor of 3.
2025-07-01 17:49:09.365 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.366 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.366 # of the computation is cached by cruncher
2025-07-01 17:49:09.366 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.366 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.366 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.366 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.366 if best_ratio < cutoff:
2025-07-01 17:49:09.366 # no non-identical "pretty close" pair
2025-07-01 17:49:09.366 if eqi is None:
2025-07-01 17:49:09.366 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.366 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.366 return
2025-07-01 17:49:09.366 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.366 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.366 else:
2025-07-01 17:49:09.366 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.366 eqi = None
2025-07-01 17:49:09.366
2025-07-01 17:49:09.366 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.366 # identical
2025-07-01 17:49:09.366
2025-07-01 17:49:09.369 # pump out diffs from before the synch point
2025-07-01 17:49:09.369 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.370
2025-07-01 17:49:09.370 # do intraline marking on the synch pair
2025-07-01 17:49:09.370 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.370 if eqi is None:
2025-07-01 17:49:09.370 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.370 atags = btags = ""
2025-07-01 17:49:09.370 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.370 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.370 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.370 if tag == 'replace':
2025-07-01 17:49:09.370 atags += '^' * la
2025-07-01 17:49:09.370 btags += '^' * lb
2025-07-01 17:49:09.370 elif tag == 'delete':
2025-07-01 17:49:09.370 atags += '-' * la
2025-07-01 17:49:09.370 elif tag == 'insert':
2025-07-01 17:49:09.370 btags += '+' * lb
2025-07-01 17:49:09.370 elif tag == 'equal':
2025-07-01 17:49:09.370 atags += ' ' * la
2025-07-01 17:49:09.370 btags += ' ' * lb
2025-07-01 17:49:09.370 else:
2025-07-01 17:49:09.370 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.370 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.371 else:
2025-07-01 17:49:09.371 # the synch pair is identical
2025-07-01 17:49:09.371 yield ' ' + aelt
2025-07-01 17:49:09.371
2025-07-01 17:49:09.371 # pump out diffs from after the synch point
2025-07-01 17:49:09.371 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.371
2025-07-01 17:49:09.371 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.371 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.371
2025-07-01 17:49:09.371 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.371 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.371 alo = 518, ahi = 1101
2025-07-01 17:49:09.371 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.371 blo = 518, bhi = 1101
2025-07-01 17:49:09.371
2025-07-01 17:49:09.371 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.371 g = []
2025-07-01 17:49:09.372 if alo < ahi:
2025-07-01 17:49:09.372 if blo < bhi:
2025-07-01 17:49:09.372 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.372 else:
2025-07-01 17:49:09.372 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.372 elif blo < bhi:
2025-07-01 17:49:09.372 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.372
2025-07-01 17:49:09.372 > yield from g
2025-07-01 17:49:09.372
2025-07-01 17:49:09.372 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.372 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.372
2025-07-01 17:49:09.372 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.372 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.372 alo = 518, ahi = 1101
2025-07-01 17:49:09.372 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.372 blo = 518, bhi = 1101
2025-07-01 17:49:09.372
2025-07-01 17:49:09.372 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.372 r"""
2025-07-01 17:49:09.373 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.373 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.373 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.373 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.373
2025-07-01 17:49:09.373 Example:
2025-07-01 17:49:09.373
2025-07-01 17:49:09.373 >>> d = Differ()
2025-07-01 17:49:09.373 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.373 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.373 >>> print(''.join(results), end="")
2025-07-01 17:49:09.373 - abcDefghiJkl
2025-07-01 17:49:09.373 + abcdefGhijkl
2025-07-01 17:49:09.373 """
2025-07-01 17:49:09.373
2025-07-01 17:49:09.373 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.373 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.373 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.374 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.374 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.374
2025-07-01 17:49:09.374 # search for the pair that matches best without being identical
2025-07-01 17:49:09.374 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.374 # on junk -- unless we have to)
2025-07-01 17:49:09.374 for j in range(blo, bhi):
2025-07-01 17:49:09.374 bj = b[j]
2025-07-01 17:49:09.374 cruncher.set_seq2(bj)
2025-07-01 17:49:09.374 for i in range(alo, ahi):
2025-07-01 17:49:09.374 ai = a[i]
2025-07-01 17:49:09.374 if ai == bj:
2025-07-01 17:49:09.374 if eqi is None:
2025-07-01 17:49:09.374 eqi, eqj = i, j
2025-07-01 17:49:09.374 continue
2025-07-01 17:49:09.374 cruncher.set_seq1(ai)
2025-07-01 17:49:09.374 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.374 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.374 # compares by a factor of 3.
2025-07-01 17:49:09.374 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.374 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.375 # of the computation is cached by cruncher
2025-07-01 17:49:09.375 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.375 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.375 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.375 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.375 if best_ratio < cutoff:
2025-07-01 17:49:09.375 # no non-identical "pretty close" pair
2025-07-01 17:49:09.375 if eqi is None:
2025-07-01 17:49:09.375 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.375 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.375 return
2025-07-01 17:49:09.375 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.375 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.375 else:
2025-07-01 17:49:09.375 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.375 eqi = None
2025-07-01 17:49:09.375
2025-07-01 17:49:09.375 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.375 # identical
2025-07-01 17:49:09.376
2025-07-01 17:49:09.376 # pump out diffs from before the synch point
2025-07-01 17:49:09.376 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.376
2025-07-01 17:49:09.376 # do intraline marking on the synch pair
2025-07-01 17:49:09.376 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.376 if eqi is None:
2025-07-01 17:49:09.376 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.376 atags = btags = ""
2025-07-01 17:49:09.376 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.376 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.376 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.376 if tag == 'replace':
2025-07-01 17:49:09.376 atags += '^' * la
2025-07-01 17:49:09.376 btags += '^' * lb
2025-07-01 17:49:09.376 elif tag == 'delete':
2025-07-01 17:49:09.376 atags += '-' * la
2025-07-01 17:49:09.376 elif tag == 'insert':
2025-07-01 17:49:09.376 btags += '+' * lb
2025-07-01 17:49:09.376 elif tag == 'equal':
2025-07-01 17:49:09.376 atags += ' ' * la
2025-07-01 17:49:09.377 btags += ' ' * lb
2025-07-01 17:49:09.377 else:
2025-07-01 17:49:09.377 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.377 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.377 else:
2025-07-01 17:49:09.377 # the synch pair is identical
2025-07-01 17:49:09.377 yield ' ' + aelt
2025-07-01 17:49:09.377
2025-07-01 17:49:09.377 # pump out diffs from after the synch point
2025-07-01 17:49:09.377 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.377
2025-07-01 17:49:09.377 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.377 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.377
2025-07-01 17:49:09.377 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.377 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.377 alo = 519, ahi = 1101
2025-07-01 17:49:09.377 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.377 blo = 519, bhi = 1101
2025-07-01 17:49:09.377
2025-07-01 17:49:09.377 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.378 g = []
2025-07-01 17:49:09.378 if alo < ahi:
2025-07-01 17:49:09.378 if blo < bhi:
2025-07-01 17:49:09.378 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.378 else:
2025-07-01 17:49:09.378 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.378 elif blo < bhi:
2025-07-01 17:49:09.378 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.378
2025-07-01 17:49:09.378 > yield from g
2025-07-01 17:49:09.378
2025-07-01 17:49:09.378 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.378 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.378
2025-07-01 17:49:09.378 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.378 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.378 alo = 519, ahi = 1101
2025-07-01 17:49:09.378 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.378 blo = 519, bhi = 1101
2025-07-01 17:49:09.378
2025-07-01 17:49:09.379 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.379 r"""
2025-07-01 17:49:09.379 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.379 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.379 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.379 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.379
2025-07-01 17:49:09.379 Example:
2025-07-01 17:49:09.379
2025-07-01 17:49:09.379 >>> d = Differ()
2025-07-01 17:49:09.379 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.379 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.379 >>> print(''.join(results), end="")
2025-07-01 17:49:09.379 - abcDefghiJkl
2025-07-01 17:49:09.379 + abcdefGhijkl
2025-07-01 17:49:09.379 """
2025-07-01 17:49:09.379
2025-07-01 17:49:09.379 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.379 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.380 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.380 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.380 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.380
2025-07-01 17:49:09.380 # search for the pair that matches best without being identical
2025-07-01 17:49:09.380 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.380 # on junk -- unless we have to)
2025-07-01 17:49:09.380 for j in range(blo, bhi):
2025-07-01 17:49:09.380 bj = b[j]
2025-07-01 17:49:09.380 cruncher.set_seq2(bj)
2025-07-01 17:49:09.380 for i in range(alo, ahi):
2025-07-01 17:49:09.380 ai = a[i]
2025-07-01 17:49:09.380 if ai == bj:
2025-07-01 17:49:09.380 if eqi is None:
2025-07-01 17:49:09.380 eqi, eqj = i, j
2025-07-01 17:49:09.380 continue
2025-07-01 17:49:09.380 cruncher.set_seq1(ai)
2025-07-01 17:49:09.380 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.380 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.380 # compares by a factor of 3.
2025-07-01 17:49:09.381 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.381 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.381 # of the computation is cached by cruncher
2025-07-01 17:49:09.381 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.381 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.381 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.381 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.381 if best_ratio < cutoff:
2025-07-01 17:49:09.381 # no non-identical "pretty close" pair
2025-07-01 17:49:09.381 if eqi is None:
2025-07-01 17:49:09.381 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.381 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.381 return
2025-07-01 17:49:09.381 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.381 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.381 else:
2025-07-01 17:49:09.381 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.381 eqi = None
2025-07-01 17:49:09.381
2025-07-01 17:49:09.381 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.382 # identical
2025-07-01 17:49:09.382
2025-07-01 17:49:09.382 # pump out diffs from before the synch point
2025-07-01 17:49:09.382 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.382
2025-07-01 17:49:09.382 # do intraline marking on the synch pair
2025-07-01 17:49:09.382 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.382 if eqi is None:
2025-07-01 17:49:09.382 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.382 atags = btags = ""
2025-07-01 17:49:09.382 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.382 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.382 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.382 if tag == 'replace':
2025-07-01 17:49:09.382 atags += '^' * la
2025-07-01 17:49:09.382 btags += '^' * lb
2025-07-01 17:49:09.382 elif tag == 'delete':
2025-07-01 17:49:09.382 atags += '-' * la
2025-07-01 17:49:09.382 elif tag == 'insert':
2025-07-01 17:49:09.382 btags += '+' * lb
2025-07-01 17:49:09.382 elif tag == 'equal':
2025-07-01 17:49:09.383 atags += ' ' * la
2025-07-01 17:49:09.387 btags += ' ' * lb
2025-07-01 17:49:09.387 else:
2025-07-01 17:49:09.387 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.388 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.388 else:
2025-07-01 17:49:09.388 # the synch pair is identical
2025-07-01 17:49:09.388 yield ' ' + aelt
2025-07-01 17:49:09.388
2025-07-01 17:49:09.388 # pump out diffs from after the synch point
2025-07-01 17:49:09.388 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.388
2025-07-01 17:49:09.388 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.388 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.388
2025-07-01 17:49:09.388 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.388 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.388 alo = 520, ahi = 1101
2025-07-01 17:49:09.388 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.388 blo = 520, bhi = 1101
2025-07-01 17:49:09.388
2025-07-01 17:49:09.388 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.388 g = []
2025-07-01 17:49:09.389 if alo < ahi:
2025-07-01 17:49:09.389 if blo < bhi:
2025-07-01 17:49:09.389 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.389 else:
2025-07-01 17:49:09.389 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.389 elif blo < bhi:
2025-07-01 17:49:09.389 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.389
2025-07-01 17:49:09.389 > yield from g
2025-07-01 17:49:09.389
2025-07-01 17:49:09.389 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.389 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.389
2025-07-01 17:49:09.389 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.389 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.389 alo = 520, ahi = 1101
2025-07-01 17:49:09.389 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.389 blo = 520, bhi = 1101
2025-07-01 17:49:09.389
2025-07-01 17:49:09.389 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.390 r"""
2025-07-01 17:49:09.390 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.390 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.390 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.390 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.390
2025-07-01 17:49:09.390 Example:
2025-07-01 17:49:09.390
2025-07-01 17:49:09.390 >>> d = Differ()
2025-07-01 17:49:09.390 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.390 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.390 >>> print(''.join(results), end="")
2025-07-01 17:49:09.390 - abcDefghiJkl
2025-07-01 17:49:09.391 + abcdefGhijkl
2025-07-01 17:49:09.391 """
2025-07-01 17:49:09.391
2025-07-01 17:49:09.391 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.391 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.391 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.391 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.391 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.391
2025-07-01 17:49:09.391 # search for the pair that matches best without being identical
2025-07-01 17:49:09.391 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.391 # on junk -- unless we have to)
2025-07-01 17:49:09.391 for j in range(blo, bhi):
2025-07-01 17:49:09.391 bj = b[j]
2025-07-01 17:49:09.391 cruncher.set_seq2(bj)
2025-07-01 17:49:09.395 for i in range(alo, ahi):
2025-07-01 17:49:09.395 ai = a[i]
2025-07-01 17:49:09.395 if ai == bj:
2025-07-01 17:49:09.395 if eqi is None:
2025-07-01 17:49:09.395 eqi, eqj = i, j
2025-07-01 17:49:09.395 continue
2025-07-01 17:49:09.396 cruncher.set_seq1(ai)
2025-07-01 17:49:09.396 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.396 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.396 # compares by a factor of 3.
2025-07-01 17:49:09.396 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.396 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.396 # of the computation is cached by cruncher
2025-07-01 17:49:09.396 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.396 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.396 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.396 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.396 if best_ratio < cutoff:
2025-07-01 17:49:09.396 # no non-identical "pretty close" pair
2025-07-01 17:49:09.396 if eqi is None:
2025-07-01 17:49:09.396 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.396 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.396 return
2025-07-01 17:49:09.397 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.397 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.397 else:
2025-07-01 17:49:09.397 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.397 eqi = None
2025-07-01 17:49:09.397
2025-07-01 17:49:09.397 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.397 # identical
2025-07-01 17:49:09.397
2025-07-01 17:49:09.397 # pump out diffs from before the synch point
2025-07-01 17:49:09.397 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.397
2025-07-01 17:49:09.397 # do intraline marking on the synch pair
2025-07-01 17:49:09.397 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.397 if eqi is None:
2025-07-01 17:49:09.397 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.397 atags = btags = ""
2025-07-01 17:49:09.397 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.397 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.401 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.401 if tag == 'replace':
2025-07-01 17:49:09.401 atags += '^' * la
2025-07-01 17:49:09.401 btags += '^' * lb
2025-07-01 17:49:09.401 elif tag == 'delete':
2025-07-01 17:49:09.401 atags += '-' * la
2025-07-01 17:49:09.401 elif tag == 'insert':
2025-07-01 17:49:09.401 btags += '+' * lb
2025-07-01 17:49:09.401 elif tag == 'equal':
2025-07-01 17:49:09.401 atags += ' ' * la
2025-07-01 17:49:09.401 btags += ' ' * lb
2025-07-01 17:49:09.401 else:
2025-07-01 17:49:09.401 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.401 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.402 else:
2025-07-01 17:49:09.402 # the synch pair is identical
2025-07-01 17:49:09.402 yield ' ' + aelt
2025-07-01 17:49:09.402
2025-07-01 17:49:09.402 # pump out diffs from after the synch point
2025-07-01 17:49:09.402 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.402
2025-07-01 17:49:09.402 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.402 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.402
2025-07-01 17:49:09.402 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.402 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.402 alo = 521, ahi = 1101
2025-07-01 17:49:09.402 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.402 blo = 521, bhi = 1101
2025-07-01 17:49:09.402
2025-07-01 17:49:09.403 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.403 g = []
2025-07-01 17:49:09.403 if alo < ahi:
2025-07-01 17:49:09.403 if blo < bhi:
2025-07-01 17:49:09.403 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.403 else:
2025-07-01 17:49:09.403 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.403 elif blo < bhi:
2025-07-01 17:49:09.403 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.403
2025-07-01 17:49:09.403 > yield from g
2025-07-01 17:49:09.403
2025-07-01 17:49:09.403 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.403 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.403
2025-07-01 17:49:09.403 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.403 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.403 alo = 521, ahi = 1101
2025-07-01 17:49:09.404 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.404 blo = 521, bhi = 1101
2025-07-01 17:49:09.404
2025-07-01 17:49:09.404 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.404 r"""
2025-07-01 17:49:09.404 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.404 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.404 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.404 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.404
2025-07-01 17:49:09.404 Example:
2025-07-01 17:49:09.404
2025-07-01 17:49:09.404 >>> d = Differ()
2025-07-01 17:49:09.404 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.404 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.404 >>> print(''.join(results), end="")
2025-07-01 17:49:09.404 - abcDefghiJkl
2025-07-01 17:49:09.405 + abcdefGhijkl
2025-07-01 17:49:09.405 """
2025-07-01 17:49:09.405
2025-07-01 17:49:09.405 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.405 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.405 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.405 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.405 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 17:49:09.405
2025-07-01 17:49:09.405 # search for the pair that matches best without being identical
2025-07-01 17:49:09.405 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 17:49:09.405 # on junk -- unless we have to)
2025-07-01 17:49:09.405 for j in range(blo, bhi):
2025-07-01 17:49:09.405 bj = b[j]
2025-07-01 17:49:09.405 cruncher.set_seq2(bj)
2025-07-01 17:49:09.406 for i in range(alo, ahi):
2025-07-01 17:49:09.406 ai = a[i]
2025-07-01 17:49:09.406 if ai == bj:
2025-07-01 17:49:09.406 if eqi is None:
2025-07-01 17:49:09.406 eqi, eqj = i, j
2025-07-01 17:49:09.406 continue
2025-07-01 17:49:09.406 cruncher.set_seq1(ai)
2025-07-01 17:49:09.406 # computing similarity is expensive, so use the quick
2025-07-01 17:49:09.406 # upper bounds first -- have seen this speed up messy
2025-07-01 17:49:09.406 # compares by a factor of 3.
2025-07-01 17:49:09.406 # note that ratio() is only expensive to compute the first
2025-07-01 17:49:09.406 # time it's called on a sequence pair; the expensive part
2025-07-01 17:49:09.406 # of the computation is cached by cruncher
2025-07-01 17:49:09.406 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 17:49:09.406 cruncher.quick_ratio() > best_ratio and \
2025-07-01 17:49:09.406 cruncher.ratio() > best_ratio:
2025-07-01 17:49:09.406 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 17:49:09.406 if best_ratio < cutoff:
2025-07-01 17:49:09.407 # no non-identical "pretty close" pair
2025-07-01 17:49:09.407 if eqi is None:
2025-07-01 17:49:09.407 # no identical pair either -- treat it as a straight replace
2025-07-01 17:49:09.407 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.407 return
2025-07-01 17:49:09.407 # no close pair, but an identical pair -- synch up on that
2025-07-01 17:49:09.407 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 17:49:09.407 else:
2025-07-01 17:49:09.407 # there's a close pair, so forget the identical pair (if any)
2025-07-01 17:49:09.407 eqi = None
2025-07-01 17:49:09.407
2025-07-01 17:49:09.407 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 17:49:09.407 # identical
2025-07-01 17:49:09.407
2025-07-01 17:49:09.407 # pump out diffs from before the synch point
2025-07-01 17:49:09.407 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 17:49:09.407
2025-07-01 17:49:09.407 # do intraline marking on the synch pair
2025-07-01 17:49:09.407 aelt, belt = a[best_i], b[best_j]
2025-07-01 17:49:09.408 if eqi is None:
2025-07-01 17:49:09.408 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 17:49:09.408 atags = btags = ""
2025-07-01 17:49:09.408 cruncher.set_seqs(aelt, belt)
2025-07-01 17:49:09.408 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 17:49:09.408 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 17:49:09.408 if tag == 'replace':
2025-07-01 17:49:09.408 atags += '^' * la
2025-07-01 17:49:09.408 btags += '^' * lb
2025-07-01 17:49:09.408 elif tag == 'delete':
2025-07-01 17:49:09.408 atags += '-' * la
2025-07-01 17:49:09.408 elif tag == 'insert':
2025-07-01 17:49:09.408 btags += '+' * lb
2025-07-01 17:49:09.408 elif tag == 'equal':
2025-07-01 17:49:09.408 atags += ' ' * la
2025-07-01 17:49:09.408 btags += ' ' * lb
2025-07-01 17:49:09.408 else:
2025-07-01 17:49:09.408 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 17:49:09.409 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 17:49:09.409 else:
2025-07-01 17:49:09.409 # the synch pair is identical
2025-07-01 17:49:09.409 yield ' ' + aelt
2025-07-01 17:49:09.409
2025-07-01 17:49:09.409 # pump out diffs from after the synch point
2025-07-01 17:49:09.409 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 17:49:09.409
2025-07-01 17:49:09.409 C:\Python3x\Lib\difflib.py:985:
2025-07-01 17:49:09.409 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.409
2025-07-01 17:49:09.409 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.409 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.409 alo = 522, ahi = 1101
2025-07-01 17:49:09.409 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.409 blo = 522, bhi = 1101
2025-07-01 17:49:09.409
2025-07-01 17:49:09.409 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.409 g = []
2025-07-01 17:49:09.410 if alo < ahi:
2025-07-01 17:49:09.410 if blo < bhi:
2025-07-01 17:49:09.410 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 17:49:09.410 else:
2025-07-01 17:49:09.410 g = self._dump('-', a, alo, ahi)
2025-07-01 17:49:09.410 elif blo < bhi:
2025-07-01 17:49:09.410 g = self._dump('+', b, blo, bhi)
2025-07-01 17:49:09.410
2025-07-01 17:49:09.410 > yield from g
2025-07-01 17:49:09.410
2025-07-01 17:49:09.410 C:\Python3x\Lib\difflib.py:997:
2025-07-01 17:49:09.410 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.410
2025-07-01 17:49:09.410 self = <difflib.Differ object at [hex]>
2025-07-01 17:49:09.410 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 17:49:09.410 alo = 522, ahi = 1101
2025-07-01 17:49:09.410 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 17:49:09.411 blo = 522, bhi = 1101
2025-07-01 17:49:09.411
2025-07-01 17:49:09.411 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 17:49:09.411 r"""
2025-07-01 17:49:09.411 When replacing one block of lines with another, search the blocks
2025-07-01 17:49:09.411 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 17:49:09.411 synch point, and intraline difference marking is done on the
2025-07-01 17:49:09.411 similar pair. Lots of work, but often worth it.
2025-07-01 17:49:09.411
2025-07-01 17:49:09.411 Example:
2025-07-01 17:49:09.411
2025-07-01 17:49:09.411 >>> d = Differ()
2025-07-01 17:49:09.411 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 17:49:09.411 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 17:49:09.411 >>> print(''.join(results), end="")
2025-07-01 17:49:09.411 - abcDefghiJkl
2025-07-01 17:49:09.412 + abcdefGhijkl
2025-07-01 17:49:09.412 """
2025-07-01 17:49:09.412
2025-07-01 17:49:09.412 # don't synch up unless the lines have a similarity score of at
2025-07-01 17:49:09.412 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 17:49:09.412 best_ratio, cutoff = 0.74, 0.75
2025-07-01 17:49:09.412 > cruncher = SequenceMatcher(self.charjunk)
2025-07-01 17:49:09.412
2025-07-01 17:49:09.412 C:\Python3x\Lib\difflib.py:915:
2025-07-01 17:49:09.412 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.412
2025-07-01 17:49:09.412 self = <difflib.SequenceMatcher object at [hex]>
2025-07-01 17:49:09.412 isjunk = <function IS_CHARACTER_JUNK at 0x000001A97A5ADC60>, a = '', b = ''
2025-07-01 17:49:09.412 autojunk = True
2025-07-01 17:49:09.412
2025-07-01 17:49:09.412 def __init__(self, isjunk=None, a='', b='', autojunk=True):
2025-07-01 17:49:09.413 """Construct a SequenceMatcher.
2025-07-01 17:49:09.413
2025-07-01 17:49:09.413 Optional arg isjunk is None (the default), or a one-argument
2025-07-01 17:49:09.413 function that takes a sequence element and returns true iff the
2025-07-01 17:49:09.413 element is junk. None is equivalent to passing "lambda x: 0", i.e.
2025-07-01 17:49:09.413 no elements are considered to be junk. For example, pass
2025-07-01 17:49:09.413 lambda x: x in " \\t"
2025-07-01 17:49:09.413 if you're comparing lines as sequences of characters, and don't
2025-07-01 17:49:09.413 want to synch up on blanks or hard tabs.
2025-07-01 17:49:09.413
2025-07-01 17:49:09.413 Optional arg a is the first of two sequences to be compared. By
2025-07-01 17:49:09.413 default, an empty string. The elements of a must be hashable. See
2025-07-01 17:49:09.413 also .set_seqs() and .set_seq1().
2025-07-01 17:49:09.413
2025-07-01 17:49:09.413 Optional arg b is the second of two sequences to be compared. By
2025-07-01 17:49:09.413 default, an empty string. The elements of b must be hashable. See
2025-07-01 17:49:09.413 also .set_seqs() and .set_seq2().
2025-07-01 17:49:09.413
2025-07-01 17:49:09.413 Optional arg autojunk should be set to False to disable the
2025-07-01 17:49:09.419 "automatic junk heuristic" that treats popular elements as junk
2025-07-01 17:49:09.419 (see module documentation for more information).
2025-07-01 17:49:09.419 """
2025-07-01 17:49:09.419
2025-07-01 17:49:09.419 # Members:
2025-07-01 17:49:09.419 # a
2025-07-01 17:49:09.419 # first sequence
2025-07-01 17:49:09.419 # b
2025-07-01 17:49:09.419 # second sequence; differences are computed as "what do
2025-07-01 17:49:09.419 # we need to do to 'a' to change it into 'b'?"
2025-07-01 17:49:09.419 # b2j
2025-07-01 17:49:09.419 # for x in b, b2j[x] is a list of the indices (into b)
2025-07-01 17:49:09.419 # at which x appears; junk and popular elements do not appear
2025-07-01 17:49:09.419 # fullbcount
2025-07-01 17:49:09.420 # for x in b, fullbcount[x] == the number of times x
2025-07-01 17:49:09.420 # appears in b; only materialized if really needed (used
2025-07-01 17:49:09.420 # only for computing quick_ratio())
2025-07-01 17:49:09.420 # matching_blocks
2025-07-01 17:49:09.420 # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
2025-07-01 17:49:09.420 # ascending & non-overlapping in i and in j; terminated by
2025-07-01 17:49:09.420 # a dummy (len(a), len(b), 0) sentinel
2025-07-01 17:49:09.420 # opcodes
2025-07-01 17:49:09.420 # a list of (tag, i1, i2, j1, j2) tuples, where tag is
2025-07-01 17:49:09.420 # one of
2025-07-01 17:49:09.420 # 'replace' a[i1:i2] should be replaced by b[j1:j2]
2025-07-01 17:49:09.420 # 'delete' a[i1:i2] should be deleted
2025-07-01 17:49:09.420 # 'insert' b[j1:j2] should be inserted
2025-07-01 17:49:09.420 # 'equal' a[i1:i2] == b[j1:j2]
2025-07-01 17:49:09.420 # isjunk
2025-07-01 17:49:09.420 # a user-supplied function taking a sequence element and
2025-07-01 17:49:09.420 # returning true iff the element is "junk" -- this has
2025-07-01 17:49:09.421 # subtle but helpful effects on the algorithm, which I'll
2025-07-01 17:49:09.421 # get around to writing up someday <0.9 wink>.
2025-07-01 17:49:09.421 # DON'T USE! Only __chain_b uses this. Use "in self.bjunk".
2025-07-01 17:49:09.421 # bjunk
2025-07-01 17:49:09.421 # the items in b for which isjunk is True.
2025-07-01 17:49:09.421 # bpopular
2025-07-01 17:49:09.421 # nonjunk items in b treated as junk by the heuristic (if used).
2025-07-01 17:49:09.421
2025-07-01 17:49:09.421 self.isjunk = isjunk
2025-07-01 17:49:09.421 self.a = self.b = None
2025-07-01 17:49:09.421 self.autojunk = autojunk
2025-07-01 17:49:09.421 > self.set_seqs(a, b)
2025-07-01 17:49:09.421
2025-07-01 17:49:09.421 C:\Python3x\Lib\difflib.py:182:
2025-07-01 17:49:09.421 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.421
2025-07-01 17:49:09.421 self = <difflib.SequenceMatcher object at [hex]>, a = '', b = ''
2025-07-01 17:49:09.421
2025-07-01 17:49:09.421 def set_seqs(self, a, b):
2025-07-01 17:49:09.421 """Set the two sequences to be compared.
2025-07-01 17:49:09.422
2025-07-01 17:49:09.422 >>> s = SequenceMatcher()
2025-07-01 17:49:09.422 >>> s.set_seqs("abcd", "bcde")
2025-07-01 17:49:09.422 >>> s.ratio()
2025-07-01 17:49:09.422 0.75
2025-07-01 17:49:09.422 """
2025-07-01 17:49:09.422
2025-07-01 17:49:09.422 self.set_seq1(a)
2025-07-01 17:49:09.422 > self.set_seq2(b)
2025-07-01 17:49:09.422
2025-07-01 17:49:09.422 C:\Python3x\Lib\difflib.py:194:
2025-07-01 17:49:09.422 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.422
2025-07-01 17:49:09.422 self = <difflib.SequenceMatcher object at [hex]>, b = ''
2025-07-01 17:49:09.422
2025-07-01 17:49:09.422 def set_seq2(self, b):
2025-07-01 17:49:09.422 """Set the second sequence to be compared.
2025-07-01 17:49:09.422
2025-07-01 17:49:09.422 The first sequence to be compared is not changed.
2025-07-01 17:49:09.422
2025-07-01 17:49:09.423 >>> s = SequenceMatcher(None, "abcd", "bcde")
2025-07-01 17:49:09.423 >>> s.ratio()
2025-07-01 17:49:09.423 0.75
2025-07-01 17:49:09.423 >>> s.set_seq2("abcd")
2025-07-01 17:49:09.423 >>> s.ratio()
2025-07-01 17:49:09.423 1.0
2025-07-01 17:49:09.423 >>>
2025-07-01 17:49:09.423
2025-07-01 17:49:09.423 SequenceMatcher computes and caches detailed information about the
2025-07-01 17:49:09.423 second sequence, so if you want to compare one sequence S against
2025-07-01 17:49:09.423 many sequences, use .set_seq2(S) once and call .set_seq1(x)
2025-07-01 17:49:09.423 repeatedly for each of the other sequences.
2025-07-01 17:49:09.423
2025-07-01 17:49:09.423 See also set_seqs() and set_seq1().
2025-07-01 17:49:09.423 """
2025-07-01 17:49:09.423
2025-07-01 17:49:09.423 if b is self.b:
2025-07-01 17:49:09.423 return
2025-07-01 17:49:09.424 self.b = b
2025-07-01 17:49:09.424 self.matching_blocks = self.opcodes = None
2025-07-01 17:49:09.424 self.fullbcount = None
2025-07-01 17:49:09.424 > self.__chain_b()
2025-07-01 17:49:09.424
2025-07-01 17:49:09.424 C:\Python3x\Lib\difflib.py:248:
2025-07-01 17:49:09.424 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 17:49:09.424
2025-07-01 17:49:09.424 self = <difflib.SequenceMatcher object at [hex]>
2025-07-01 17:49:09.424
2025-07-01 17:49:09.424 def __chain_b(self):
2025-07-01 17:49:09.424 # Because isjunk is a user-defined (not C) function, and we test
2025-07-01 17:49:09.424 # for junk a LOT, it's important to minimize the number of calls.
2025-07-01 17:49:09.424 # Before the tricks described here, __chain_b was by far the most
2025-07-01 17:49:09.424 # time-consuming routine in the whole module! If anyone sees
2025-07-01 17:49:09.424 # Jim Roskind, thank him again for profile.py -- I never would
2025-07-01 17:49:09.424 # have guessed that.
2025-07-01 17:49:09.424 # The first trick is to build b2j ignoring the possibility
2025-07-01 17:49:09.425 # of junk. I.e., we don't call isjunk at all yet. Throwing
2025-07-01 17:49:09.425 # out the junk later is much cheaper than building b2j "right"
2025-07-01 17:49:09.425 # from the start.
2025-07-01 17:49:09.425 b = self.b
2025-07-01 17:49:09.425 self.b2j = b2j = {}
2025-07-01 17:49:09.425
2025-07-01 17:49:09.425 for i, elt in enumerate(b):
2025-07-01 17:49:09.425 indices = b2j.setdefault(elt, [])
2025-07-01 17:49:09.425 indices.append(i)
2025-07-01 17:49:09.425
2025-07-01 17:49:09.425 # Purge junk elements
2025-07-01 17:49:09.425 self.bjunk = junk = set()
2025-07-01 17:49:09.425 isjunk = self.isjunk
2025-07-01 17:49:09.425 if isjunk:
2025-07-01 17:49:09.425 > for elt in b2j.keys():
2025-07-01 17:49:09.425 E RecursionError: maximum recursion depth exceeded while calling a Python object
2025-07-01 17:49:09.425
2025-07-01 17:49:09.425 C:\Python3x\Lib\difflib.py:288: RecursionError
2025-07-01 17:49:09.425 ---------------------------- Captured stdout setup ----------------------------
2025-07-01 17:49:09.425 Creating db: localhost:H:\QA\temp\qa2024.tmp\fbqa\test_1508\test.fdb [page_size=None, sql_dialect=None, charset='NONE', user=SYSDBA, password=masterkey]
|
3 #text |
act = <firebird.qa.plugin.Action pytest object at [hex]>
@pytest.mark.version('>=3')
def test_1(act: Action):
act.expected_stdout = expected_stdout
act.execute()
> assert act.clean_stdout == act.clean_expected_stdout
tests\bugs\core_2969_test.py:1211:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ops = ('==',), results = (False,)
expls = ('%(py2)s\n{%(py2)s = %(py0)s.clean_stdout\n} == %(py6)s\n{%(py6)s = %(py4)s.clean_expected_stdout\n}',)
each_obj = ('WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy')
def _call_reprcompare(
ops: Sequence[str],
results: Sequence[bool],
expls: Sequence[str],
each_obj: Sequence[object],
) -> str:
for i, res, expl in zip(range(len(ops)), results, expls):
try:
done = not res
except Exception:
done = True
if done:
break
if util._reprcompare is not None:
> custom = util._reprcompare(ops[i], each_obj[i], each_obj[i + 1])
C:\Python3x\Lib\site-packages\_pytest\assertion\rewrite.py:499:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
op = '=='
left = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var_... yyy\n1var_997 yyy\n1var_998 yyy\n1var_999 yyy'
right = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1 var... yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'
def callbinrepr(op, left: object, right: object) -> Optional[str]:
"""Call the pytest_assertrepr_compare hook and prepare the result.
This uses the first result from the hook and then ensures the
following:
* Overly verbose explanations are truncated unless configured otherwise
(eg. if running in verbose mode).
* Embedded newlines are escaped to help util.format_explanation()
later.
* If the rewrite mode is used embedded %-characters are replaced
to protect later % formatting.
The result can be formatted by util.format_explanation() for
pretty printing.
"""
> hook_result = ihook.pytest_assertrepr_compare(
config=item.config, op=op, left=left, right=right
)
C:\Python3x\Lib\site-packages\_pytest\assertion\__init__.py:141:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <HookCaller 'pytest_assertrepr_compare'>
kwargs = {'config': <_pytest.config.Config pytest object at [hex]>, 'left': 'WAS_OVERWRITTEN CTX_KEY ...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'}
firstresult = False
def __call__(self, **kwargs: object) -> Any:
"""Call the hook.
Only accepts keyword arguments, which should match the hook
specification.
Returns the result(s) of calling all registered plugins, see
:ref:`calling`.
"""
assert (
not self.is_historic()
), "Cannot directly call a historic hook - use call_historic instead."
self._verify_all_args_are_provided(kwargs)
firstresult = self.spec.opts.get("firstresult", False) if self.spec else False
# Copy because plugins may register other plugins during iteration (#438).
> return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult)
C:\Python3x\Lib\site-packages\pluggy\_hooks.py:501:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <_pytest.config.PytestPluginManager pytest object at [hex]>
hook_name = 'pytest_assertrepr_compare'
methods = [<HookImpl plugin_name='assertion', plugin=<module '_pytest.assertion' from 'C:\\Python3x\\Lib\\site-packages\\_pytest...plugin_name='firebird', plugin=<module 'firebird.qa.plugin' from 'H:\\QA\\firebird-qa\\src\\firebird\\qa\\plugin.py'>>]
kwargs = {'config': <_pytest.config.Config pytest object at [hex]>, 'left': 'WAS_OVERWRITTEN CTX_KEY ...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'}
firstresult = False
def _hookexec(
self,
hook_name: str,
methods: Sequence[HookImpl],
kwargs: Mapping[str, object],
firstresult: bool,
) -> object | list[object]:
# called from all hookcaller instances.
# enable_tracing will set its own wrapping function at self._inner_hookexec
> return self._inner_hookexec(hook_name, methods, kwargs, firstresult)
C:\Python3x\Lib\site-packages\pluggy\_manager.py:119:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
config = <_pytest.config.Config pytest object at [hex]>, op = '=='
left = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var_... yyy\n1var_997 yyy\n1var_998 yyy\n1var_999 yyy'
right = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1 var... yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'
def pytest_assertrepr_compare(config: Config, op: str, left: object, right: object) -> Optional[List[str]]:
"""Returns explanation for comparisons in failing assert expressions.
If both objects are `str`, uses `difflib.ndiff` to provide explanation.
"""
if isinstance(left, str) and isinstance(right, str) and op == "==":
# 16.11.2023, pzotov: we have to put empty string at the beginning of each comparing lists.
# Otherwise first diff will be at the same line as 'assert' phrase, which causes readability be poor.
#
left_lines = ['']
left_lines.extend(left.splitlines())
right_lines = ['']
right_lines.extend(right.splitlines())
# 16.11.2023, pzotov
# ndiff output must be interpreted as following:
# * "E - <some text>" ==> MISSED line (it was in EXPECTED text but absent in actual one).
# * "E + <some_text>" ==> EXCESSIVE line (it is not in EXPECTED text but did appear in actual).
# But for QA-purposes, this output must answer the question:
# "what must be changed in ACTUAL output so that it became equal to EXPECTED"
# (i.e. how to "REVERT" actual back to expected).
# In order to see such result, we have to specify 'right_lines' to the 1st argument that is passed to ndiff().
# ::: NB :::
# We assume that all tests are written so that ACTUAL output is left side in 'assert' statement and EXPECTED
# is right side, e.g: assert act.clean_stdout == act.clean_expected_stdout
# This requirement is CRUCIAL if we use ndiff() instead of default pytest comparison method!
#
> return list(ndiff(right_lines, left_lines))
src\firebird\qa\plugin.py:608:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
def compare(self, a, b):
r"""
Compare two sequences of lines; generate the resulting delta.
Each sequence must contain individual single-line strings ending with
newlines. Such sequences can be obtained from the `readlines()` method
of file-like objects. The delta generated also consists of newline-
terminated strings, ready to be printed as-is via the writelines()
method of a file-like object.
Example:
>>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(True),
... 'ore\ntree\nemu\n'.splitlines(True))),
... end="")
- one
+ ore
- two
- three
+ tree
+ emu
"""
cruncher = SequenceMatcher(self.linejunk, a, b)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if tag == 'replace':
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
elif tag == 'delete':
g = self._dump('-', a, alo, ahi)
elif tag == 'insert':
g = self._dump('+', b, blo, bhi)
elif tag == 'equal':
g = self._dump(' ', a, alo, ahi)
else:
raise ValueError('unknown tag %r' % (tag,))
> yield from g
C:\Python3x\Lib\difflib.py:872:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 3, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 3, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 4, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 4, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 4, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 4, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 5, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 5, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 5, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 5, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 6, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 6, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 6, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 6, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 7, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 7, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 7, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 7, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 8, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 8, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 8, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 8, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 9, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 9, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 9, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 9, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 10, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 10, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 10, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 10, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 11, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 11, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 11, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 11, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 12, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 12, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 12, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 12, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 13, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 13, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 13, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 13, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 14, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 14, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 14, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 14, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 15, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 15, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 15, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 15, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 16, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 16, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 16, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 16, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 17, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 17, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 17, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 17, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 18, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 18, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 18, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 18, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 19, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 19, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 19, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 19, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 20, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 20, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 20, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 20, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 21, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 21, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 21, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 21, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 22, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 22, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 22, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 22, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 23, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 23, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 23, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 23, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 26, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 26, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 26, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 26, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 27, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 27, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 27, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 27, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 28, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 28, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 28, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 28, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 29, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 29, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 29, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 29, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 30, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 30, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 30, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 30, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 31, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 31, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 31, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 31, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 32, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 32, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 32, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 32, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 33, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 33, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 33, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 33, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 34, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 34, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 34, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 34, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 35, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 35, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 35, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 35, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 36, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 36, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 36, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 36, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 37, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 37, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 37, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 37, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 38, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 38, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 38, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 38, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 39, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 39, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 39, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 39, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 40, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 40, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 40, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 40, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 41, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 41, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 41, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 41, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 42, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 42, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 42, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 42, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 43, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 43, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 43, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 43, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 44, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 44, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 44, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 44, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 45, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 45, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 45, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 45, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 48, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 48, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 48, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 48, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 49, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 49, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 49, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 49, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 50, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 50, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 50, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 50, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 51, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 51, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 51, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 51, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 52, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 52, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 52, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 52, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 53, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 53, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 53, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 53, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 54, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 54, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 54, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 54, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 55, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 55, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 55, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 55, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 56, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 56, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 56, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 56, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 57, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 57, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 57, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 57, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 58, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 58, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 58, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 58, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 59, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 59, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 59, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 59, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 60, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 60, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 60, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 60, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 61, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 61, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 61, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 61, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 62, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 62, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 62, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 62, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 63, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 63, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 63, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 63, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 64, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 64, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 64, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 64, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 65, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 65, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 65, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 65, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 66, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 66, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 66, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 66, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 67, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 67, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 67, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 67, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 70, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 70, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 70, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 70, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 71, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 71, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 71, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 71, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 72, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 72, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 72, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 72, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 73, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 73, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 73, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 73, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 74, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 74, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 74, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 74, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 75, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 75, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 75, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 75, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 76, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 76, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 76, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 76, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 77, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 77, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 77, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 77, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 78, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 78, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 78, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 78, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 79, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 79, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 79, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 79, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 80, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 80, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 80, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 80, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 81, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 81, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 81, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 81, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 82, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 82, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 82, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 82, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 83, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 83, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 83, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 83, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 84, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 84, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 84, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 84, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 85, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 85, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 85, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 85, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 86, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 86, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 86, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 86, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 87, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 87, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 87, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 87, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 88, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 88, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 88, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 88, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 89, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 89, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 89, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 89, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 92, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 92, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 92, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 92, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 93, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 93, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 93, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 93, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 94, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 94, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 94, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 94, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 95, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 95, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 95, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 95, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 96, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 96, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 96, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 96, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 97, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 97, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 97, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 97, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 98, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 98, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 98, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 98, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 99, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 99, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 99, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 99, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 100, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 100, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 100, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 100, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 101, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 101, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 101, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 101, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 102, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 102, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 102, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 102, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 103, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 103, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 103, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 103, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 104, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 104, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 104, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 104, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 105, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 105, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 105, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 105, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 106, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 106, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 106, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 106, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 107, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 107, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 107, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 107, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 108, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 108, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 108, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 108, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 109, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 109, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 109, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 109, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 110, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 110, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 110, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 110, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 111, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 111, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 111, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 111, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 114, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 114, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 114, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 114, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 115, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 115, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 115, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 115, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 116, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 116, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 116, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 116, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 117, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 117, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 117, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 117, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 118, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 118, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 118, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 118, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 119, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 119, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 119, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 119, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 120, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 120, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 120, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 120, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 121, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 121, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 121, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 121, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 122, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 122, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 122, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 122, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 123, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 123, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 123, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 123, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 124, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 124, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 124, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 124, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 125, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 125, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 125, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 125, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 126, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 126, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 126, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 126, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 127, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 127, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 127, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 127, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 128, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 128, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 128, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 128, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 129, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 129, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 129, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 129, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 130, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 130, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 130, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 130, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 131, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 131, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 131, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 131, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 132, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 132, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 132, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 132, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 133, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 133, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 133, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 133, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 136, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 136, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 136, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 136, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 137, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 137, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 137, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 137, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 138, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 138, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 138, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 138, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 139, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 139, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 139, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 139, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 140, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 140, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 140, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 140, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 141, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 141, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 141, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 141, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 142, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 142, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 142, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 142, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 143, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 143, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 143, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 143, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 144, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 144, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 144, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 144, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 145, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 145, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 145, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 145, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 146, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 146, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 146, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 146, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 147, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 147, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 147, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 147, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 148, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 148, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 148, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 148, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 149, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 149, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 149, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 149, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 150, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 150, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 150, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 150, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 151, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 151, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 151, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 151, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 152, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 152, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 152, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 152, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 153, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 153, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 153, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 153, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 154, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 154, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 154, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 154, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 155, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 155, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 155, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 155, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 158, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 158, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 158, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 158, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 159, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 159, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 159, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 159, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 160, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 160, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 160, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 160, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 161, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 161, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 161, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 161, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 162, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 162, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 162, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 162, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 163, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 163, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 163, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 163, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 164, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 164, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 164, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 164, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 165, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 165, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 165, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 165, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 166, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 166, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 166, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 166, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 167, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 167, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 167, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 167, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 168, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 168, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 168, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 168, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 169, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 169, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 169, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 169, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 170, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 170, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 170, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 170, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 171, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 171, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 171, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 171, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 172, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 172, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 172, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 172, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 173, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 173, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 173, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 173, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 174, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 174, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 174, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 174, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 175, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 175, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 175, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 175, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 176, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 176, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 176, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 176, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 177, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 177, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 177, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 177, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 180, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 180, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 180, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 180, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 181, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 181, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 181, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 181, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 182, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 182, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 182, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 182, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 183, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 183, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 183, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 183, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 184, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 184, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 184, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 184, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 185, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 185, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 185, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 185, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 186, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 186, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 186, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 186, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 187, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 187, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 187, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 187, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 188, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 188, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 188, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 188, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 189, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 189, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 189, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 189, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 190, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 190, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 190, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 190, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 191, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 191, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 191, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 191, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 192, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 192, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 192, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 192, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 193, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 193, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 193, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 193, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 194, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 194, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 194, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 194, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 195, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 195, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 195, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 195, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 196, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 196, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 196, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 196, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 197, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 197, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 197, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 197, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 198, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 198, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 198, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 198, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 199, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 199, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 199, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 199, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 202, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 202, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 202, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 202, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 203, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 203, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 203, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 203, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 204, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 204, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 204, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 204, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 205, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 205, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 205, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 205, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 206, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 206, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 206, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 206, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 207, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 207, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 207, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 207, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 208, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 208, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 208, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 208, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 209, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 209, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 209, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 209, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 210, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 210, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 210, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 210, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 211, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 211, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 211, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 211, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 212, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 212, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 212, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 212, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 213, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 213, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 213, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 213, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 214, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 214, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 214, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 214, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 215, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 215, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 215, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 215, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 216, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 216, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 216, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 216, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 217, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 217, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 217, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 217, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 218, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 218, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 218, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 218, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 219, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 219, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 219, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 219, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 220, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 220, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 220, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 220, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 221, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 221, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 221, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 221, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 224, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 224, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 224, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 224, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 225, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 225, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 225, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 225, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 226, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 226, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 226, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 226, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 227, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 227, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 227, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 227, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 228, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 228, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 228, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 228, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 229, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 229, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 229, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 229, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 230, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 230, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 230, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 230, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 231, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 231, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 231, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 231, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 232, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 232, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 232, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 232, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 233, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 233, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 233, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 233, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 234, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 234, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 234, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 234, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 235, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 235, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 235, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 235, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 236, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 236, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 236, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 236, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 237, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 237, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 237, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 237, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 238, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 238, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 238, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 238, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 239, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 239, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 239, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 239, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 240, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 240, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 240, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 240, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 241, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 241, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 241, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 241, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 242, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 242, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 242, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 242, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 243, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 243, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 243, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 243, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 246, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 246, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 246, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 246, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 247, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 247, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 247, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 247, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 248, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 248, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 248, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 248, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 249, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 249, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 249, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 249, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 250, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 250, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 250, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 250, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 251, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 251, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 251, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 251, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 252, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 252, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 252, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 252, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 253, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 253, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 253, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 253, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 254, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 254, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 254, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 254, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 255, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 255, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 255, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 255, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 256, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 256, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 256, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 256, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 257, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 257, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 257, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 257, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 258, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 258, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 258, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 258, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 259, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 259, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 259, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 259, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 260, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 260, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 260, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 260, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 261, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 261, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 261, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 261, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 262, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 262, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 262, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 262, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 263, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 263, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 263, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 263, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 264, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 264, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 264, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 264, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 265, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 265, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 265, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 265, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 268, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 268, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 268, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 268, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 269, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 269, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 269, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 269, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 270, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 270, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 270, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 270, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 271, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 271, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 271, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 271, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 272, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 272, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 272, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 272, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 273, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 273, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 273, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 273, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 274, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 274, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 274, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 274, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 275, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 275, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 275, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 275, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 276, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 276, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 276, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 276, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 277, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 277, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 277, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 277, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 278, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 278, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 278, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 278, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 279, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 279, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 279, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 279, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 280, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 280, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 280, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 280, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 281, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 281, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 281, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 281, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 282, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 282, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 282, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 282, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 283, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 283, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 283, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 283, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 284, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 284, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 284, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 284, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 285, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 285, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 285, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 285, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 286, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 286, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 286, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 286, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 287, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 287, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 287, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 287, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 290, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 290, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 290, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 290, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 291, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 291, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 291, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 291, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 292, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 292, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 292, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 292, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 293, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 293, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 293, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 293, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 294, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 294, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 294, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 294, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 295, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 295, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 295, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 295, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 296, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 296, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 296, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 296, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 297, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 297, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 297, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 297, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 298, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 298, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 298, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 298, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 299, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 299, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 299, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 299, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 300, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 300, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 300, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 300, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 301, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 301, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 301, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 301, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 302, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 302, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 302, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 302, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 303, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 303, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 303, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 303, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 304, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 304, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 304, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 304, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 305, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 305, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 305, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 305, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 306, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 306, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 306, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 306, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 307, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 307, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 307, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 307, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 308, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 308, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 308, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 308, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 309, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 309, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 309, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 309, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 312, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 312, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 312, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 312, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 313, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 313, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 313, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 313, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 314, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 314, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 314, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 314, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 315, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 315, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 315, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 315, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 316, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 316, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 316, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 316, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 317, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 317, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 317, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 317, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 318, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 318, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 318, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 318, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 319, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 319, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 319, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 319, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 320, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 320, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 320, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 320, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 321, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 321, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 321, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 321, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 322, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 322, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 322, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 322, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 323, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 323, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 323, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 323, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 324, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 324, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 324, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 324, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 325, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 325, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 325, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 325, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 326, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 326, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 326, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 326, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 327, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 327, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 327, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 327, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 328, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 328, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 328, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 328, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 329, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 329, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 329, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 329, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 330, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 330, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 330, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 330, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 331, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 331, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 331, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 331, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 334, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 334, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 334, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 334, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 335, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 335, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 335, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 335, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 336, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 336, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 336, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 336, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 337, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 337, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 337, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 337, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 338, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 338, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 338, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 338, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 339, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 339, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 339, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 339, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 340, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 340, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 340, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 340, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 341, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 341, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 341, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 341, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 342, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 342, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 342, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 342, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 343, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 343, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 343, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 343, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 344, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 344, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 344, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 344, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 345, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 345, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 345, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 345, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 346, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 346, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 346, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 346, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 347, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 347, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 347, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 347, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 348, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 348, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 348, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 348, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 349, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 349, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 349, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 349, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 350, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 350, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 350, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 350, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 351, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 351, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 351, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 351, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 352, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 352, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 352, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 352, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 353, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 353, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 353, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 353, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 356, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 356, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 356, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 356, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 357, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 357, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 357, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 357, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 358, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 358, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 358, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 358, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 359, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 359, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 359, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 359, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 360, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 360, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 360, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 360, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 361, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 361, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 361, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 361, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 362, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 362, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 362, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 362, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 363, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 363, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 363, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 363, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 364, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 364, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 364, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 364, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 365, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 365, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 365, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 365, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 366, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 366, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 366, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 366, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 367, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 367, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 367, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 367, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 368, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 368, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 368, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 368, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 369, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 369, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 369, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 369, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 370, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 370, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 370, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 370, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 371, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 371, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 371, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 371, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 372, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 372, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 372, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 372, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 373, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 373, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 373, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 373, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 374, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 374, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 374, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 374, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 375, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 375, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 375, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 375, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 378, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 378, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 378, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 378, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 379, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 379, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 379, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 379, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 380, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 380, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 380, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 380, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 381, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 381, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 381, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 381, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 382, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 382, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 382, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 382, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 383, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 383, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 383, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 383, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 384, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 384, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 384, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 384, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 385, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 385, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 385, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 385, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 386, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 386, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 386, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 386, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 387, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 387, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 387, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 387, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 388, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 388, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 388, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 388, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 389, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 389, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 389, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 389, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 390, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 390, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 390, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 390, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 391, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 391, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 391, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 391, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 392, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 392, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 392, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 392, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 393, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 393, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 393, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 393, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 394, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 394, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 394, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 394, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 395, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 395, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 395, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 395, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 396, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 396, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 396, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 396, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 397, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 397, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 397, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 397, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 400, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 400, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 400, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 400, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 401, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 401, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 401, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 401, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 402, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 402, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 402, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 402, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 403, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 403, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 403, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 403, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 404, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 404, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 404, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 404, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 405, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 405, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 405, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 405, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 406, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 406, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 406, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 406, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 407, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 407, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 407, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 407, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 408, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 408, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 408, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 408, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 409, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 409, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 409, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 409, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 410, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 410, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 410, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 410, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 411, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 411, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 411, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 411, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 412, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 412, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 412, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 412, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 413, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 413, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 413, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 413, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 414, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 414, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 414, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 414, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 415, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 415, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 415, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 415, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 416, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 416, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 416, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 416, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 417, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 417, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 417, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 417, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 418, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 418, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 418, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 418, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 419, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 419, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 419, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 419, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 422, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 422, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 422, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 422, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 423, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 423, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 423, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 423, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 424, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 424, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 424, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 424, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 425, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 425, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 425, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 425, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 426, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 426, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 426, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 426, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 427, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 427, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 427, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 427, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 428, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 428, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 428, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 428, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 429, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 429, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 429, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 429, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 430, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 430, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 430, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 430, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 431, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 431, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 431, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 431, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 432, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 432, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 432, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 432, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 433, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 433, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 433, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 433, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 434, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 434, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 434, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 434, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 435, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 435, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 435, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 435, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 436, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 436, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 436, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 436, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 437, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 437, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 437, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 437, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 438, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 438, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 438, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 438, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 439, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 439, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 439, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 439, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 440, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 440, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 440, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 440, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 441, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 441, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 441, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 441, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 444, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 444, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 444, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 444, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 445, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 445, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 445, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 445, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 446, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 446, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 446, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 446, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 447, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 447, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 447, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 447, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 448, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 448, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 448, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 448, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 449, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 449, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 449, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 449, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 450, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 450, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 450, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 450, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 451, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 451, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 451, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 451, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 452, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 452, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 452, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 452, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 453, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 453, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 453, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 453, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 454, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 454, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 454, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 454, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 455, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 455, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 455, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 455, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 456, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 456, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 456, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 456, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 457, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 457, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 457, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 457, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 458, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 458, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 458, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 458, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 459, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 459, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 459, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 459, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 460, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 460, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 460, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 460, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 461, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 461, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 461, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 461, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 462, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 462, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 462, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 462, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 463, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 463, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 463, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 463, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 466, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 466, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 466, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 466, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 467, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 467, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 467, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 467, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 468, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 468, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 468, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 468, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 469, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 469, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 469, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 469, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 470, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 470, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 470, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 470, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 471, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 471, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 471, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 471, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 472, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 472, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 472, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 472, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 473, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 473, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 473, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 473, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 474, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 474, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 474, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 474, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 475, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 475, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 475, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 475, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 476, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 476, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 476, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 476, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 477, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 477, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 477, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 477, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 478, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 478, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 478, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 478, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 479, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 479, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 479, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 479, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 480, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 480, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 480, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 480, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 481, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 481, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 481, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 481, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 482, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 482, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 482, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 482, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 483, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 483, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 483, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 483, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 484, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 484, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 484, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 484, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 485, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 485, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 485, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 485, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 488, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 488, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 488, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 488, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 489, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 489, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 489, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 489, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 490, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 490, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 490, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 490, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 491, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 491, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 491, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 491, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 492, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 492, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 492, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 492, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 493, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 493, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 493, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 493, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 494, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 494, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 494, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 494, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 495, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 495, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 495, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 495, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 496, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 496, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 496, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 496, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 497, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 497, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 497, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 497, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 498, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 498, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 498, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 498, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 499, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 499, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 499, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 499, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 500, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 500, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 500, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 500, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 501, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 501, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 501, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 501, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 502, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 502, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 502, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 502, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 503, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 503, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 503, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 503, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 504, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 504, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 504, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 504, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 505, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 505, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 505, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 505, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 506, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 506, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 506, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 506, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 507, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 507, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 507, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 507, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 510, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 510, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 510, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 510, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 511, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 511, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 511, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 511, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 512, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 512, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 512, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 512, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 513, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 513, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 513, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 513, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 514, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 514, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 514, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 514, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 515, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 515, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 515, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 515, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 516, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 516, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 516, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 516, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 517, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 517, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 517, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 517, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 518, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 518, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 518, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 518, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 519, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 519, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 519, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 519, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 520, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 520, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 520, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 520, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 521, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 521, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 521, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 521, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
C:\Python3x\Lib\difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 522, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 522, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
C:\Python3x\Lib\difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 522, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 522, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
> cruncher = SequenceMatcher(self.charjunk)
C:\Python3x\Lib\difflib.py:915:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.SequenceMatcher pytest object at [hex]>
isjunk = <function IS_CHARACTER_JUNK at 0x000001A97A5ADC60>, a = '', b = ''
autojunk = True
def __init__(self, isjunk=None, a='', b='', autojunk=True):
"""Construct a SequenceMatcher.
Optional arg isjunk is None (the default), or a one-argument
function that takes a sequence element and returns true iff the
element is junk. None is equivalent to passing "lambda x: 0", i.e.
no elements are considered to be junk. For example, pass
lambda x: x in " \\t"
if you're comparing lines as sequences of characters, and don't
want to synch up on blanks or hard tabs.
Optional arg a is the first of two sequences to be compared. By
default, an empty string. The elements of a must be hashable. See
also .set_seqs() and .set_seq1().
Optional arg b is the second of two sequences to be compared. By
default, an empty string. The elements of b must be hashable. See
also .set_seqs() and .set_seq2().
Optional arg autojunk should be set to False to disable the
"automatic junk heuristic" that treats popular elements as junk
(see module documentation for more information).
"""
# Members:
# a
# first sequence
# b
# second sequence; differences are computed as "what do
# we need to do to 'a' to change it into 'b'?"
# b2j
# for x in b, b2j[x] is a list of the indices (into b)
# at which x appears; junk and popular elements do not appear
# fullbcount
# for x in b, fullbcount[x] == the number of times x
# appears in b; only materialized if really needed (used
# only for computing quick_ratio())
# matching_blocks
# a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
# ascending & non-overlapping in i and in j; terminated by
# a dummy (len(a), len(b), 0) sentinel
# opcodes
# a list of (tag, i1, i2, j1, j2) tuples, where tag is
# one of
# 'replace' a[i1:i2] should be replaced by b[j1:j2]
# 'delete' a[i1:i2] should be deleted
# 'insert' b[j1:j2] should be inserted
# 'equal' a[i1:i2] == b[j1:j2]
# isjunk
# a user-supplied function taking a sequence element and
# returning true iff the element is "junk" -- this has
# subtle but helpful effects on the algorithm, which I'll
# get around to writing up someday <0.9 wink>.
# DON'T USE! Only __chain_b uses this. Use "in self.bjunk".
# bjunk
# the items in b for which isjunk is True.
# bpopular
# nonjunk items in b treated as junk by the heuristic (if used).
self.isjunk = isjunk
self.a = self.b = None
self.autojunk = autojunk
> self.set_seqs(a, b)
C:\Python3x\Lib\difflib.py:182:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.SequenceMatcher pytest object at [hex]>, a = '', b = ''
def set_seqs(self, a, b):
"""Set the two sequences to be compared.
>>> s = SequenceMatcher()
>>> s.set_seqs("abcd", "bcde")
>>> s.ratio()
0.75
"""
self.set_seq1(a)
> self.set_seq2(b)
C:\Python3x\Lib\difflib.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.SequenceMatcher pytest object at [hex]>, b = ''
def set_seq2(self, b):
"""Set the second sequence to be compared.
The first sequence to be compared is not changed.
>>> s = SequenceMatcher(None, "abcd", "bcde")
>>> s.ratio()
0.75
>>> s.set_seq2("abcd")
>>> s.ratio()
1.0
>>>
SequenceMatcher computes and caches detailed information about the
second sequence, so if you want to compare one sequence S against
many sequences, use .set_seq2(S) once and call .set_seq1(x)
repeatedly for each of the other sequences.
See also set_seqs() and set_seq1().
"""
if b is self.b:
return
self.b = b
self.matching_blocks = self.opcodes = None
self.fullbcount = None
> self.__chain_b()
C:\Python3x\Lib\difflib.py:248:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.SequenceMatcher pytest object at [hex]>
def __chain_b(self):
# Because isjunk is a user-defined (not C) function, and we test
# for junk a LOT, it's important to minimize the number of calls.
# Before the tricks described here, __chain_b was by far the most
# time-consuming routine in the whole module! If anyone sees
# Jim Roskind, thank him again for profile.py -- I never would
# have guessed that.
# The first trick is to build b2j ignoring the possibility
# of junk. I.e., we don't call isjunk at all yet. Throwing
# out the junk later is much cheaper than building b2j "right"
# from the start.
b = self.b
self.b2j = b2j = {}
for i, elt in enumerate(b):
indices = b2j.setdefault(elt, [])
indices.append(i)
# Purge junk elements
self.bjunk = junk = set()
isjunk = self.isjunk
if isjunk:
> for elt in b2j.keys():
E RecursionError: maximum recursion depth exceeded while calling a Python object
C:\Python3x\Lib\difflib.py:288: RecursionError
|