Prediction/evaluation loop, shared by `Trainer.evaluate()` and `Trainer.predict()`. Works both with or without labels.
(
self,
dataloader: DataLoader,
description: str,
prediction_loss_only: bool | None = None,
ignore_keys: list[str] | None = None,
metric_key_prefix: str = "eval",
)
| 2634 | return output.metrics |
| 2635 | |
| 2636 | def evaluation_loop( |
| 2637 | self, |
| 2638 | dataloader: DataLoader, |
| 2639 | description: str, |
| 2640 | prediction_loss_only: bool | None = None, |
| 2641 | ignore_keys: list[str] | None = None, |
| 2642 | metric_key_prefix: str = "eval", |
| 2643 | ) -> EvalLoopOutput: |
| 2644 | """ |
| 2645 | Prediction/evaluation loop, shared by `Trainer.evaluate()` and `Trainer.predict()`. |
| 2646 | |
| 2647 | Works both with or without labels. |
| 2648 | """ |
| 2649 | args = self.args |
| 2650 | |
| 2651 | prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else args.prediction_loss_only |
| 2652 | |
| 2653 | # if eval is called w/o train, handle model prep here |
| 2654 | if self.is_deepspeed_enabled and self.deepspeed is None: |
| 2655 | _, _ = deepspeed_init(self, num_training_steps=0, inference=True) |
| 2656 | |
| 2657 | model = self._wrap_model(self.model, training=False) |
| 2658 | |
| 2659 | if len(self.accelerator._models) == 0 and model is self.model: |
| 2660 | start_time = time.time() |
| 2661 | model = ( |
| 2662 | self.accelerator.prepare(model) |
| 2663 | if self.is_deepspeed_enabled or (self.is_fsdp_enabled and not self.args.torch_compile) |
| 2664 | else self.accelerator.prepare_model(model, evaluation_mode=True) |
| 2665 | ) |
| 2666 | self.model_preparation_time = round(time.time() - start_time, 4) |
| 2667 | |
| 2668 | if self.is_fsdp_enabled: |
| 2669 | self.model = model |
| 2670 | |
| 2671 | # for the rest of this function `model` is the outside model, whether it was wrapped or not |
| 2672 | if model is not self.model: |
| 2673 | self.model_wrapped = model |
| 2674 | |
| 2675 | # backward compatibility |
| 2676 | if self.is_deepspeed_enabled: |
| 2677 | self.deepspeed = self.model_wrapped |
| 2678 | |
| 2679 | # if full fp16 or bf16 eval is wanted and this ``evaluation`` or ``predict`` isn't called |
| 2680 | # while ``train`` is running, cast it to the right dtype first and then put on device |
| 2681 | if not self.is_in_train: |
| 2682 | if args.fp16_full_eval: |
| 2683 | model = model.to(dtype=torch.float16, device=args.device) |
| 2684 | elif args.bf16_full_eval: |
| 2685 | model = model.to(dtype=torch.bfloat16, device=args.device) |
| 2686 | |
| 2687 | batch_size = self.args.eval_batch_size |
| 2688 | |
| 2689 | logger.info(f"\n***** Running {description} *****") |
| 2690 | if has_length(dataloader): |
| 2691 | logger.info(f" Num examples = {self.num_examples(dataloader)}") |
| 2692 | else: |
| 2693 | logger.info(" Num examples: Unknown") |
no test coverage detected