hub / github.com/huggingface/transformers / _run_epoch

Method _run_epoch

src/transformers/trainer.py:1662–1825 · view source on GitHub ↗

Run one full pass over the dataloader.

(
        self,
        model,
        epoch,
        train_dataloader,
        steps_in_epoch,
        num_update_steps_per_epoch,
        trial,
        ignore_keys_for_eval,
        start_time,
        resume_from_checkpoint,
        epochs_trained,
        steps_trained_in_current_epoch,
    )

Source from the content-addressed store, hash-verified

1660	return model, train_dataloader
1661
1662	def _run_epoch(
1663	self,
1664	model,
1665	epoch,
1666	train_dataloader,
1667	steps_in_epoch,
1668	num_update_steps_per_epoch,
1669	trial,
1670	ignore_keys_for_eval,
1671	start_time,
1672	resume_from_checkpoint,
1673	epochs_trained,
1674	steps_trained_in_current_epoch,
1675	):
1676	"""Run one full pass over the dataloader."""
1677
1678	step = -1
1679	grad_norm = None
1680	learning_rate = None
1681	rng_to_sync = False
1682
1683	# Handle resumption from checkpoint: skip already-trained batches in the resumed epoch
1684	num_update_steps_trained = 0
1685	if epoch == epochs_trained and resume_from_checkpoint is not None:
1686	if steps_trained_in_current_epoch > 0 and not self.args.ignore_data_skip:
1687	train_dataloader = skip_first_batches(train_dataloader, steps_trained_in_current_epoch)
1688	step = steps_trained_in_current_epoch - 1
1689	num_update_steps_trained = steps_trained_in_current_epoch // self.args.gradient_accumulation_steps
1690	rng_to_sync = True
1691	elif steps_trained_in_current_epoch == 0:
1692	self._load_rng_state(resume_from_checkpoint)
1693
1694	if hasattr(train_dataloader, "set_epoch"):
1695	train_dataloader.set_epoch(epoch)
1696	epoch_iterator = iter(train_dataloader)
1697
1698	# We chunkify the epoch iterator into gradient accumulation steps `n` batches
1699	remainder = steps_in_epoch % self.args.gradient_accumulation_steps
1700	if remainder == 0:
1701	remainder = self.args.gradient_accumulation_steps
1702
1703	# Outer loop: one iteration per optimizer step. Each iteration prefetches
1704	# `gradient_accumulation_steps` batches (fewer for the last step if the epoch
1705	# doesn't divide evenly).
1706	for update_step in range(num_update_steps_trained, num_update_steps_per_epoch):
1707	num_batches = (
1708	self.args.gradient_accumulation_steps if update_step != (num_update_steps_per_epoch - 1) else remainder
1709	)
1710	batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, self.args.device)
1711
1712	# This is used to correctly scale the loss when the last accumulation step has fewer batches.
1713	# Not used if `num_items_in_batch` is not None.
1714	self.current_gradient_accumulation_steps = len(batch_samples)
1715
1716	# need to sync after if we skipped the batches in `get_batch_samples` for shuffle order reason
1717	if rng_to_sync:
1718	self._load_rng_state(resume_from_checkpoint)
1719	rng_to_sync = False

Callers 1

_inner_training_loopMethod · 0.95

Calls 15

_load_rng_stateMethod · 0.95

get_batch_samplesMethod · 0.95

training_stepMethod · 0.95

floating_point_opsMethod · 0.95

_track_num_input_tokensMethod · 0.95

_clip_grad_normMethod · 0.95

_get_grad_normMethod · 0.95

_get_learning_rateMethod · 0.95

_maybe_log_save_evaluateMethod · 0.95

is_torch_xla_availableFunction · 0.85

set_epochMethod · 0.80

zero_gradMethod · 0.80

Tested by

no test coverage detected