hub / github.com/huggingface/transformers / _finalize_training

Method _finalize_training

src/transformers/trainer.py:1827–1874 · view source on GitHub ↗

Finalize training: metrics, best-model loading, cleanup. Returns TrainOutput.

(self, trial, num_train_samples, start_time)

Source from the content-addressed store, hash-verified

1825	)
1826
1827	def _finalize_training(self, trial, num_train_samples, start_time):
1828	"""Finalize training: metrics, best-model loading, cleanup. Returns TrainOutput."""
1829	logger.info("\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n")
1830
1831	# add remaining tr_loss
1832	self._total_loss_scalar += self._tr_loss.item()
1833	effective_global_step = max(self.state.global_step, 0.001) # Avoid ZeroDivisionError
1834	train_loss = self._total_loss_scalar / effective_global_step
1835
1836	metrics = speed_metrics(
1837	"train",
1838	start_time,
1839	num_samples=num_train_samples,
1840	num_steps=self.state.max_steps,
1841	)
1842	self.store_flos()
1843	metrics["total_flos"] = self.state.total_flos
1844	metrics["train_loss"] = train_loss
1845
1846	self._memory_tracker.stop_and_update_metrics(metrics)
1847	self.log(metrics)
1848
1849	if self.args.load_best_model_at_end and self.state.best_model_checkpoint is not None:
1850	self._load_best_model()
1851
1852	checkpoints_sorted = sort_checkpoints(
1853	output_dir=self._get_output_dir(trial), best_model_checkpoint=self.state.best_model_checkpoint
1854	)
1855
1856	# Delete the last checkpoint when save_total_limit=1 if it's different from the best checkpoint and process allowed to save.
1857	if self.args.should_save and self.state.best_model_checkpoint is not None and self.args.save_total_limit == 1:
1858	for checkpoint in checkpoints_sorted:
1859	if not os.path.samefile(checkpoint, self.state.best_model_checkpoint):
1860	logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
1861	shutil.rmtree(checkpoint, ignore_errors=True)
1862
1863	self.control = self.callback_handler.on_train_end(self.args, self.state, self.control)
1864
1865	# Wait for the checkpoint to be uploaded.
1866	self._finish_current_push()
1867
1868	# After training we make sure to retrieve back the original forward pass method
1869	# for the embedding layer by removing the forward post hook.
1870	if self.neftune_noise_alpha is not None:
1871	deactivate_neftune(self.model, self.neftune_hook_handle, self.accelerator)
1872	self.is_in_train = False
1873
1874	return TrainOutput(self.state.global_step, train_loss, metrics)
1875
1876	def training_step(
1877	self,

Callers 1

_inner_training_loopMethod · 0.95

Calls 12

store_flosMethod · 0.95

logMethod · 0.95

_load_best_modelMethod · 0.95

_get_output_dirMethod · 0.95

_finish_current_pushMethod · 0.95

speed_metricsFunction · 0.85

sort_checkpointsFunction · 0.85

deactivate_neftuneFunction · 0.85

TrainOutputClass · 0.85

stop_and_update_metricsMethod · 0.80

infoMethod · 0.45

on_train_endMethod · 0.45

Tested by

no test coverage detected