MCPcopy
hub / github.com/huggingface/transformers / is_copy_consistent

Function is_copy_consistent

utils/check_copies.py:645–839  ·  view source on GitHub ↗

Check if the code commented as a copy in a file matches the original. Args: filename (`str`): The name of the file to check. overwrite (`bool`, *optional*, defaults to `False`): Whether or not to overwrite the copies when they don't match. bu

(
    filename: str, overwrite: bool = False, buffer: dict | None = None
)

Source from the content-addressed store, hash-verified

643
644
645def is_copy_consistent(
646 filename: str, overwrite: bool = False, buffer: dict | None = None
647) -> list[tuple[str, int]] | None:
648 """
649 Check if the code commented as a copy in a file matches the original.
650
651 Args:
652 filename (`str`):
653 The name of the file to check.
654 overwrite (`bool`, *optional*, defaults to `False`):
655 Whether or not to overwrite the copies when they don't match.
656 buffer (`dict`, *optional*):
657 The buffer used to store the previous results in order to speed up the process.
658
659 Returns:
660 `Optional[List[Tuple[str, int]]]`: If `overwrite=False`, returns the list of differences as tuples `(str, int)`
661 with the name of the object having a diff and the line number where there is the first diff.
662 """
663 base_path = TRANSFORMERS_PATH if not filename.startswith("tests") else MODEL_TEST_PATH
664
665 with open(filename, "r", encoding="utf-8", newline="\n") as f:
666 lines = f.readlines()
667 diffs = []
668 line_index = 0
669 # Not a for loop cause `lines` is going to change (if `overwrite=True`).
670 search_re = _re_copy_warning_for_test_file if filename.startswith("tests") else _re_copy_warning
671 while line_index < len(lines):
672 search = search_re.search(lines[line_index])
673 if search is None:
674 line_index += 1
675 continue
676
677 # There is some copied code here, let's retrieve the original.
678 indent, object_name, replace_pattern = search.groups()
679
680 # Find the file lines, the object's code, and its blocks
681 try:
682 target_lines, theoretical_code, theoretical_code_splits = find_code_and_splits(
683 object_name, base_path, buffer=buffer
684 )
685 except Exception as exc:
686 logger.error(f"Error while trying to find source code for {filename}.\n\n" + str(exc) + "[0")
687 return []
688
689 # code replaced by the patterns
690 theoretical_code_blocks = OrderedDict()
691 for name, start, end in theoretical_code_splits:
692 name = replace_code(name, replace_pattern)
693 code = "".join(target_lines[start:end])
694 code = replace_code(code, replace_pattern)
695 theoretical_code_blocks[name] = code
696
697 theoretical_indent = get_indent(theoretical_code)
698
699 # `start_index` is the index of the first line (the definition header) after `# Copied from`.
700 # (`indent != theoretical_indent` doesn't seem to occur so far, not sure what this case is for.)
701 start_index = line_index + 1 if indent == theoretical_indent else line_index
702 # enter the block body

Calls 14

find_code_and_splitsFunction · 0.85
replace_codeFunction · 0.85
_should_continueFunction · 0.85
split_code_into_blocksFunction · 0.85
_sanity_check_splitsFunction · 0.85
stylifyFunction · 0.85
check_codes_matchFunction · 0.85
joinMethod · 0.80
splitMethod · 0.80
get_indentFunction · 0.70
errorMethod · 0.45
keysMethod · 0.45