MCPcopy
hub / github.com/huggingface/transformers / train

Method train

src/transformers/trainer.py:1331–1438  ·  view source on GitHub ↗

Main training entry point. Args: resume_from_checkpoint (`str` or `bool`, *optional*): If a `str`, local path to a saved checkpoint as saved by a previous instance of [`Trainer`]. If a `bool` and equals `True`, load the last checkpoint in

(
        self,
        resume_from_checkpoint: str | bool | None = None,
        trial: "optuna.Trial | dict[str, Any] | None" = None,
        ignore_keys_for_eval: list[str] | None = None,
    )

Source from the content-addressed store, hash-verified

1329 # ---- Training ----
1330
1331 def train(
1332 self,
1333 resume_from_checkpoint: str | bool | None = None,
1334 trial: "optuna.Trial | dict[str, Any] | None" = None,
1335 ignore_keys_for_eval: list[str] | None = None,
1336 ) -> TrainOutput:
1337 """
1338 Main training entry point.
1339
1340 Args:
1341 resume_from_checkpoint (`str` or `bool`, *optional*):
1342 If a `str`, local path to a saved checkpoint as saved by a previous instance of [`Trainer`]. If a
1343 `bool` and equals `True`, load the last checkpoint in *args.output_dir* as saved by a previous instance
1344 of [`Trainer`]. If present, training will resume from the model/optimizer/scheduler states loaded here.
1345 trial (`optuna.Trial` or `dict[str, Any]`, *optional*):
1346 The trial run or the hyperparameter dictionary for hyperparameter search.
1347 ignore_keys_for_eval (`list[str]`, *optional*)
1348 A list of keys in the output of your model (if it is a dictionary) that should be ignored when
1349 gathering predictions for evaluation during the training.
1350
1351 Returns:
1352 [`~trainer_utils.TrainOutput`]: Object containing the global step count, training loss, and metrics.
1353 """
1354 if resume_from_checkpoint is False:
1355 resume_from_checkpoint = None
1356
1357 # memory metrics - must set up as early as possible
1358 self._memory_tracker.start()
1359
1360 args = self.args
1361
1362 self.is_in_train = True
1363
1364 # Model re-init
1365 if self.model_init is not None:
1366 # Seed must be set before instantiating the model when using model_init.
1367 enable_full_determinism(args.seed) if args.full_determinism else set_seed(args.seed)
1368 self.model = self.call_model_init(trial)
1369 # Reinitializes optimizer and scheduler
1370 self.optimizer, self.lr_scheduler = None, None
1371 if self.place_model_on_device:
1372 self._move_model_to_device(self.model, args.device)
1373 self.model_wrapped = self.model
1374
1375 if self.args.use_liger_kernel:
1376 apply_liger_kernel(self.model, self.args.liger_kernel_config)
1377
1378 # When fp16/bf16 full eval is enabled, __init__ skips device placement so that
1379 # evaluation_loop can cast dtype and move in one step. Move the model now for training.
1380 if (args.fp16_full_eval or args.bf16_full_eval) and not self.is_model_parallel and self.model_init is None:
1381 self._move_model_to_device(self.model, args.device)
1382
1383 # Activate gradient checkpointing if needed
1384 if args.gradient_checkpointing:
1385 self.model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=args.gradient_checkpointing_kwargs)
1386
1387 # If the model uses a tokenizer, it may have a new tokens for fine-tuning purposes.
1388 if isinstance(self.processing_class, (PreTrainedTokenizerBase, ProcessorMixin)) and hasattr(

Calls 15

call_model_initMethod · 0.95
_move_model_to_deviceMethod · 0.95
_hp_search_setupMethod · 0.95
_load_from_checkpointMethod · 0.95
enable_full_determinismFunction · 0.85
set_seedFunction · 0.85
apply_liger_kernelFunction · 0.85
align_special_tokensFunction · 0.85
activate_neftuneFunction · 0.85
get_last_checkpointFunction · 0.85
is_sagemaker_mp_enabledFunction · 0.85