(cls, data: dict[str, Any])
| 137 | |
| 138 | @classmethod |
| 139 | def from_dict(cls, data: dict[str, Any]) -> "BenchmarkResult": |
| 140 | # Handle GPU metrics, which is saved as None if it contains only None values |
| 141 | if data["gpu_metrics"] is None: |
| 142 | gpu_metrics = [None for _ in range(len(data["e2e_latency"]))] |
| 143 | else: |
| 144 | gpu_metrics = [GPURawMetrics.from_dict(gm) for gm in data["gpu_metrics"]] |
| 145 | # Handle timestamps, which can be saved as None to reduce file size |
| 146 | if data["timestamps"] is None: |
| 147 | timestamps = [None for _ in range(len(data["e2e_latency"]))] |
| 148 | else: |
| 149 | timestamps = data["timestamps"] |
| 150 | # Create a new instance and accumulate the data |
| 151 | new_instance = cls() |
| 152 | new_instance.e2e_latency = data["e2e_latency"] |
| 153 | new_instance._timestamps = timestamps |
| 154 | new_instance.time_to_first_token = data["time_to_first_token"] |
| 155 | new_instance.inter_token_latency = data["inter_token_latency"] |
| 156 | new_instance.shape_and_decoded_outputs = data["shape_and_decoded_outputs"] |
| 157 | new_instance.gpu_metrics = gpu_metrics |
| 158 | return new_instance |
| 159 | |
| 160 | def get_throughput(self, total_generated_tokens: int) -> list[float]: |
| 161 | return [total_generated_tokens / e2e_latency for e2e_latency in self.e2e_latency] |
no outgoing calls