Check if the code commented as a copy in a file matches the original. Args: filename (`str`): The name of the file to check. overwrite (`bool`, *optional*, defaults to `False`): Whether or not to overwrite the copies when they don't match. bu
(
filename: str, overwrite: bool = False, buffer: dict | None = None
)
| 643 | |
| 644 | |
| 645 | def is_copy_consistent( |
| 646 | filename: str, overwrite: bool = False, buffer: dict | None = None |
| 647 | ) -> list[tuple[str, int]] | None: |
| 648 | """ |
| 649 | Check if the code commented as a copy in a file matches the original. |
| 650 | |
| 651 | Args: |
| 652 | filename (`str`): |
| 653 | The name of the file to check. |
| 654 | overwrite (`bool`, *optional*, defaults to `False`): |
| 655 | Whether or not to overwrite the copies when they don't match. |
| 656 | buffer (`dict`, *optional*): |
| 657 | The buffer used to store the previous results in order to speed up the process. |
| 658 | |
| 659 | Returns: |
| 660 | `Optional[List[Tuple[str, int]]]`: If `overwrite=False`, returns the list of differences as tuples `(str, int)` |
| 661 | with the name of the object having a diff and the line number where there is the first diff. |
| 662 | """ |
| 663 | base_path = TRANSFORMERS_PATH if not filename.startswith("tests") else MODEL_TEST_PATH |
| 664 | |
| 665 | with open(filename, "r", encoding="utf-8", newline="\n") as f: |
| 666 | lines = f.readlines() |
| 667 | diffs = [] |
| 668 | line_index = 0 |
| 669 | # Not a for loop cause `lines` is going to change (if `overwrite=True`). |
| 670 | search_re = _re_copy_warning_for_test_file if filename.startswith("tests") else _re_copy_warning |
| 671 | while line_index < len(lines): |
| 672 | search = search_re.search(lines[line_index]) |
| 673 | if search is None: |
| 674 | line_index += 1 |
| 675 | continue |
| 676 | |
| 677 | # There is some copied code here, let's retrieve the original. |
| 678 | indent, object_name, replace_pattern = search.groups() |
| 679 | |
| 680 | # Find the file lines, the object's code, and its blocks |
| 681 | try: |
| 682 | target_lines, theoretical_code, theoretical_code_splits = find_code_and_splits( |
| 683 | object_name, base_path, buffer=buffer |
| 684 | ) |
| 685 | except Exception as exc: |
| 686 | logger.error(f"[31mError while trying to find source code for {filename}.\n\n" + str(exc) + "[0") |
| 687 | return [] |
| 688 | |
| 689 | # code replaced by the patterns |
| 690 | theoretical_code_blocks = OrderedDict() |
| 691 | for name, start, end in theoretical_code_splits: |
| 692 | name = replace_code(name, replace_pattern) |
| 693 | code = "".join(target_lines[start:end]) |
| 694 | code = replace_code(code, replace_pattern) |
| 695 | theoretical_code_blocks[name] = code |
| 696 | |
| 697 | theoretical_indent = get_indent(theoretical_code) |
| 698 | |
| 699 | # `start_index` is the index of the first line (the definition header) after `# Copied from`. |
| 700 | # (`indent != theoretical_indent` doesn't seem to occur so far, not sure what this case is for.) |
| 701 | start_index = line_index + 1 if indent == theoretical_indent else line_index |
| 702 | # enter the block body |