MCPcopy
hub / github.com/huggingface/transformers / _finalize_training

Method _finalize_training

src/transformers/trainer.py:1827–1874  ·  view source on GitHub ↗

Finalize training: metrics, best-model loading, cleanup. Returns TrainOutput.

(self, trial, num_train_samples, start_time)

Source from the content-addressed store, hash-verified

1825 )
1826
1827 def _finalize_training(self, trial, num_train_samples, start_time):
1828 """Finalize training: metrics, best-model loading, cleanup. Returns TrainOutput."""
1829 logger.info("\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n")
1830
1831 # add remaining tr_loss
1832 self._total_loss_scalar += self._tr_loss.item()
1833 effective_global_step = max(self.state.global_step, 0.001) # Avoid ZeroDivisionError
1834 train_loss = self._total_loss_scalar / effective_global_step
1835
1836 metrics = speed_metrics(
1837 "train",
1838 start_time,
1839 num_samples=num_train_samples,
1840 num_steps=self.state.max_steps,
1841 )
1842 self.store_flos()
1843 metrics["total_flos"] = self.state.total_flos
1844 metrics["train_loss"] = train_loss
1845
1846 self._memory_tracker.stop_and_update_metrics(metrics)
1847 self.log(metrics)
1848
1849 if self.args.load_best_model_at_end and self.state.best_model_checkpoint is not None:
1850 self._load_best_model()
1851
1852 checkpoints_sorted = sort_checkpoints(
1853 output_dir=self._get_output_dir(trial), best_model_checkpoint=self.state.best_model_checkpoint
1854 )
1855
1856 # Delete the last checkpoint when save_total_limit=1 if it's different from the best checkpoint and process allowed to save.
1857 if self.args.should_save and self.state.best_model_checkpoint is not None and self.args.save_total_limit == 1:
1858 for checkpoint in checkpoints_sorted:
1859 if not os.path.samefile(checkpoint, self.state.best_model_checkpoint):
1860 logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
1861 shutil.rmtree(checkpoint, ignore_errors=True)
1862
1863 self.control = self.callback_handler.on_train_end(self.args, self.state, self.control)
1864
1865 # Wait for the checkpoint to be uploaded.
1866 self._finish_current_push()
1867
1868 # After training we make sure to retrieve back the original forward pass method
1869 # for the embedding layer by removing the forward post hook.
1870 if self.neftune_noise_alpha is not None:
1871 deactivate_neftune(self.model, self.neftune_hook_handle, self.accelerator)
1872 self.is_in_train = False
1873
1874 return TrainOutput(self.state.global_step, train_loss, metrics)
1875
1876 def training_step(
1877 self,

Callers 1

_inner_training_loopMethod · 0.95

Calls 12

store_flosMethod · 0.95
logMethod · 0.95
_load_best_modelMethod · 0.95
_get_output_dirMethod · 0.95
_finish_current_pushMethod · 0.95
speed_metricsFunction · 0.85
sort_checkpointsFunction · 0.85
deactivate_neftuneFunction · 0.85
TrainOutputClass · 0.85
infoMethod · 0.45
on_train_endMethod · 0.45

Tested by

no test coverage detected