Main training entry point. Args: resume_from_checkpoint (`str` or `bool`, *optional*): If a `str`, local path to a saved checkpoint as saved by a previous instance of [`Trainer`]. If a `bool` and equals `True`, load the last checkpoint in
(
self,
resume_from_checkpoint: str | bool | None = None,
trial: "optuna.Trial | dict[str, Any] | None" = None,
ignore_keys_for_eval: list[str] | None = None,
)
| 1329 | # ---- Training ---- |
| 1330 | |
| 1331 | def train( |
| 1332 | self, |
| 1333 | resume_from_checkpoint: str | bool | None = None, |
| 1334 | trial: "optuna.Trial | dict[str, Any] | None" = None, |
| 1335 | ignore_keys_for_eval: list[str] | None = None, |
| 1336 | ) -> TrainOutput: |
| 1337 | """ |
| 1338 | Main training entry point. |
| 1339 | |
| 1340 | Args: |
| 1341 | resume_from_checkpoint (`str` or `bool`, *optional*): |
| 1342 | If a `str`, local path to a saved checkpoint as saved by a previous instance of [`Trainer`]. If a |
| 1343 | `bool` and equals `True`, load the last checkpoint in *args.output_dir* as saved by a previous instance |
| 1344 | of [`Trainer`]. If present, training will resume from the model/optimizer/scheduler states loaded here. |
| 1345 | trial (`optuna.Trial` or `dict[str, Any]`, *optional*): |
| 1346 | The trial run or the hyperparameter dictionary for hyperparameter search. |
| 1347 | ignore_keys_for_eval (`list[str]`, *optional*) |
| 1348 | A list of keys in the output of your model (if it is a dictionary) that should be ignored when |
| 1349 | gathering predictions for evaluation during the training. |
| 1350 | |
| 1351 | Returns: |
| 1352 | [`~trainer_utils.TrainOutput`]: Object containing the global step count, training loss, and metrics. |
| 1353 | """ |
| 1354 | if resume_from_checkpoint is False: |
| 1355 | resume_from_checkpoint = None |
| 1356 | |
| 1357 | # memory metrics - must set up as early as possible |
| 1358 | self._memory_tracker.start() |
| 1359 | |
| 1360 | args = self.args |
| 1361 | |
| 1362 | self.is_in_train = True |
| 1363 | |
| 1364 | # Model re-init |
| 1365 | if self.model_init is not None: |
| 1366 | # Seed must be set before instantiating the model when using model_init. |
| 1367 | enable_full_determinism(args.seed) if args.full_determinism else set_seed(args.seed) |
| 1368 | self.model = self.call_model_init(trial) |
| 1369 | # Reinitializes optimizer and scheduler |
| 1370 | self.optimizer, self.lr_scheduler = None, None |
| 1371 | if self.place_model_on_device: |
| 1372 | self._move_model_to_device(self.model, args.device) |
| 1373 | self.model_wrapped = self.model |
| 1374 | |
| 1375 | if self.args.use_liger_kernel: |
| 1376 | apply_liger_kernel(self.model, self.args.liger_kernel_config) |
| 1377 | |
| 1378 | # When fp16/bf16 full eval is enabled, __init__ skips device placement so that |
| 1379 | # evaluation_loop can cast dtype and move in one step. Move the model now for training. |
| 1380 | if (args.fp16_full_eval or args.bf16_full_eval) and not self.is_model_parallel and self.model_init is None: |
| 1381 | self._move_model_to_device(self.model, args.device) |
| 1382 | |
| 1383 | # Activate gradient checkpointing if needed |
| 1384 | if args.gradient_checkpointing: |
| 1385 | self.model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=args.gradient_checkpointing_kwargs) |
| 1386 | |
| 1387 | # If the model uses a tokenizer, it may have a new tokens for fine-tuning purposes. |
| 1388 | if isinstance(self.processing_class, (PreTrainedTokenizerBase, ProcessorMixin)) and hasattr( |