MCPcopy
hub / github.com/huggingface/transformers / _run_epoch

Method _run_epoch

src/transformers/trainer.py:1662–1825  ·  view source on GitHub ↗

Run one full pass over the dataloader.

(
        self,
        model,
        epoch,
        train_dataloader,
        steps_in_epoch,
        num_update_steps_per_epoch,
        trial,
        ignore_keys_for_eval,
        start_time,
        resume_from_checkpoint,
        epochs_trained,
        steps_trained_in_current_epoch,
    )

Source from the content-addressed store, hash-verified

1660 return model, train_dataloader
1661
1662 def _run_epoch(
1663 self,
1664 model,
1665 epoch,
1666 train_dataloader,
1667 steps_in_epoch,
1668 num_update_steps_per_epoch,
1669 trial,
1670 ignore_keys_for_eval,
1671 start_time,
1672 resume_from_checkpoint,
1673 epochs_trained,
1674 steps_trained_in_current_epoch,
1675 ):
1676 """Run one full pass over the dataloader."""
1677
1678 step = -1
1679 grad_norm = None
1680 learning_rate = None
1681 rng_to_sync = False
1682
1683 # Handle resumption from checkpoint: skip already-trained batches in the resumed epoch
1684 num_update_steps_trained = 0
1685 if epoch == epochs_trained and resume_from_checkpoint is not None:
1686 if steps_trained_in_current_epoch > 0 and not self.args.ignore_data_skip:
1687 train_dataloader = skip_first_batches(train_dataloader, steps_trained_in_current_epoch)
1688 step = steps_trained_in_current_epoch - 1
1689 num_update_steps_trained = steps_trained_in_current_epoch // self.args.gradient_accumulation_steps
1690 rng_to_sync = True
1691 elif steps_trained_in_current_epoch == 0:
1692 self._load_rng_state(resume_from_checkpoint)
1693
1694 if hasattr(train_dataloader, "set_epoch"):
1695 train_dataloader.set_epoch(epoch)
1696 epoch_iterator = iter(train_dataloader)
1697
1698 # We chunkify the epoch iterator into gradient accumulation steps `n` batches
1699 remainder = steps_in_epoch % self.args.gradient_accumulation_steps
1700 if remainder == 0:
1701 remainder = self.args.gradient_accumulation_steps
1702
1703 # Outer loop: one iteration per optimizer step. Each iteration prefetches
1704 # `gradient_accumulation_steps` batches (fewer for the last step if the epoch
1705 # doesn't divide evenly).
1706 for update_step in range(num_update_steps_trained, num_update_steps_per_epoch):
1707 num_batches = (
1708 self.args.gradient_accumulation_steps if update_step != (num_update_steps_per_epoch - 1) else remainder
1709 )
1710 batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, self.args.device)
1711
1712 # This is used to correctly scale the loss when the last accumulation step has fewer batches.
1713 # Not used if `num_items_in_batch` is not None.
1714 self.current_gradient_accumulation_steps = len(batch_samples)
1715
1716 # need to sync after if we skipped the batches in `get_batch_samples` for shuffle order reason
1717 if rng_to_sync:
1718 self._load_rng_state(resume_from_checkpoint)
1719 rng_to_sync = False

Callers 1

_inner_training_loopMethod · 0.95

Calls 15

_load_rng_stateMethod · 0.95
get_batch_samplesMethod · 0.95
training_stepMethod · 0.95
floating_point_opsMethod · 0.95
_clip_grad_normMethod · 0.95
_get_grad_normMethod · 0.95
_get_learning_rateMethod · 0.95
is_torch_xla_availableFunction · 0.85
set_epochMethod · 0.80
zero_gradMethod · 0.80

Tested by

no test coverage detected