Push model and checkpoint files to the Hub from a checkpoint folder.
(self, checkpoint_folder: str)
| 4102 | ) |
| 4103 | |
| 4104 | def _push_from_checkpoint(self, checkpoint_folder: str) -> None: |
| 4105 | """Push model and checkpoint files to the Hub from a checkpoint folder.""" |
| 4106 | if not self.is_world_process_zero() or self.args.hub_strategy == HubStrategy.END: |
| 4107 | return |
| 4108 | # If we haven't finished the last push, we don't do this one unless args.hub_always_push=True. |
| 4109 | if not self.args.hub_always_push and self.push_in_progress is not None and not self.push_in_progress.is_done(): |
| 4110 | return |
| 4111 | |
| 4112 | self.callback_handler.on_push_begin(self.args, self.state, self.control) |
| 4113 | output_dir = self.args.output_dir |
| 4114 | # To avoid a new synchronization of all model weights, we just copy the file from the checkpoint folder |
| 4115 | modeling_files = [CONFIG_NAME, GENERATION_CONFIG_NAME, WEIGHTS_NAME, SAFE_WEIGHTS_NAME] |
| 4116 | # Add sharded checkpoints if we have an index |
| 4117 | for index_file in [WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_INDEX_NAME]: |
| 4118 | index_path = os.path.join(checkpoint_folder, index_file) |
| 4119 | if os.path.isfile(index_path): |
| 4120 | modeling_files.append(index_file) |
| 4121 | with open(index_path) as f: |
| 4122 | index = json.loads(f.read()) |
| 4123 | shard_files = list(set(index["weight_map"].values())) |
| 4124 | modeling_files.extend(shard_files) |
| 4125 | if is_peft_available(): |
| 4126 | modeling_files.extend([ADAPTER_CONFIG_NAME, ADAPTER_WEIGHTS_NAME, ADAPTER_SAFE_WEIGHTS_NAME]) |
| 4127 | for modeling_file in modeling_files: |
| 4128 | if os.path.isfile(os.path.join(checkpoint_folder, modeling_file)): |
| 4129 | shutil.copy(os.path.join(checkpoint_folder, modeling_file), os.path.join(output_dir, modeling_file)) |
| 4130 | # Saving the processing class is fast and we don't know how many files it may have spawned, so we resave it to be sure. |
| 4131 | if self.processing_class is not None: |
| 4132 | self.processing_class.save_pretrained(output_dir) |
| 4133 | # Same for the training arguments |
| 4134 | torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME)) |
| 4135 | |
| 4136 | if self.args.save_strategy == SaveStrategy.STEPS: |
| 4137 | commit_message = f"Training in progress, step {self.state.global_step}" |
| 4138 | else: |
| 4139 | commit_message = f"Training in progress, epoch {int(self.state.epoch)}" |
| 4140 | |
| 4141 | model_push_job = hf_api().upload_folder( |
| 4142 | repo_id=self.hub_model_id, |
| 4143 | folder_path=output_dir, |
| 4144 | commit_message=commit_message, |
| 4145 | token=self.args.hub_token, |
| 4146 | run_as_future=True, |
| 4147 | ignore_patterns=["_*", f"{PREFIX_CHECKPOINT_DIR}-*"], |
| 4148 | revision=self.args.hub_revision, |
| 4149 | ) |
| 4150 | |
| 4151 | push_jobs = [model_push_job] |
| 4152 | |
| 4153 | if self.args.hub_strategy in [HubStrategy.CHECKPOINT, HubStrategy.ALL_CHECKPOINTS]: |
| 4154 | path_in_repo = ( |
| 4155 | "last-checkpoint" if self.args.hub_strategy == HubStrategy.CHECKPOINT else Path(checkpoint_folder).name |
| 4156 | ) |
| 4157 | checkpoint_push = hf_api().upload_folder( |
| 4158 | repo_id=self.hub_model_id, |
| 4159 | folder_path=checkpoint_folder, |
| 4160 | path_in_repo=path_in_repo, |
| 4161 | commit_message=commit_message + ", checkpoint", |
no test coverage detected