Main benchmark runner that coordinates benchmark execution.
| 137 | |
| 138 | |
| 139 | class BenchmarkRunner: |
| 140 | """Main benchmark runner that coordinates benchmark execution.""" |
| 141 | |
| 142 | def __init__( |
| 143 | self, |
| 144 | logger: logging.Logger, |
| 145 | output_dir: str | None = None, |
| 146 | branch_name: str | None = None, |
| 147 | commit_id: str | None = None, |
| 148 | commit_message: str | None = None, |
| 149 | ) -> None: |
| 150 | # Those stay constant for the whole run |
| 151 | self.logger = logger |
| 152 | if output_dir is None: |
| 153 | output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "benchmark_results") |
| 154 | self.output_dir = output_dir |
| 155 | self.branch_name = branch_name |
| 156 | self.commit_id = get_git_revision() if commit_id is None else commit_id |
| 157 | self.commit_message = commit_message |
| 158 | os.makedirs(self.output_dir, exist_ok=True) |
| 159 | self.profile_dir = None |
| 160 | # Attributes that are reset for each model |
| 161 | self._setup_for = "" |
| 162 | # Attributes that are reset for each run |
| 163 | self.model: GenerationMixin | None = None |
| 164 | self.device_type = torch.accelerator.current_accelerator().type if is_torch_accelerator_available() else "cuda" |
| 165 | self.torch_accelerator_module = getattr(torch, self.device_type, torch.cuda) |
| 166 | |
| 167 | def cleanup(self) -> None: |
| 168 | del self.model |
| 169 | self.model = None |
| 170 | flush_memory() |
| 171 | |
| 172 | @staticmethod |
| 173 | def _is_primary_process() -> bool: |
| 174 | if not torch.distributed.is_available() or not torch.distributed.is_initialized(): |
| 175 | return True |
| 176 | return torch.distributed.get_rank() == 0 |
| 177 | |
| 178 | def setup_benchmark(self, model_id: str, config: BenchmarkConfig) -> None: |
| 179 | # Some attributes only need to be set once per model |
| 180 | if self._setup_for != model_id: |
| 181 | self.tokenizer = AutoTokenizer.from_pretrained(model_id) |
| 182 | # We set the EOS token to the padding token for open-ended generation |
| 183 | self.tokenizer.eos_token = self.tokenizer.pad_token |
| 184 | self._setup_for = model_id |
| 185 | |
| 186 | # Prepare inputs |
| 187 | self.inputs = self.tokenizer( |
| 188 | [DEFAULT_PROMPT for _ in range(config.batch_size)], |
| 189 | return_tensors="pt", |
| 190 | max_length=config.sequence_length, |
| 191 | truncation=True, |
| 192 | return_attention_mask=True, |
| 193 | ) |
| 194 | self.inputs["use_cache"] = True |
| 195 | |
| 196 | # Prepare generation config |