Estimate the time it'll take to fine-tune the dataset
(df: pd.DataFrame)
| 590 | |
| 591 | |
| 592 | def estimate_fine_tuning_time(df: pd.DataFrame) -> None: |
| 593 | """ |
| 594 | Estimate the time it'll take to fine-tune the dataset |
| 595 | """ |
| 596 | ft_format = infer_task_type(df) |
| 597 | expected_time = 1.0 |
| 598 | if ft_format == "classification": |
| 599 | num_examples = len(df) |
| 600 | expected_time = num_examples * 1.44 |
| 601 | else: |
| 602 | size = df.memory_usage(index=True).sum() |
| 603 | expected_time = size * 0.0515 |
| 604 | |
| 605 | def format_time(time: float) -> str: |
| 606 | if time < 60: |
| 607 | return f"{round(time, 2)} seconds" |
| 608 | elif time < 3600: |
| 609 | return f"{round(time / 60, 2)} minutes" |
| 610 | elif time < 86400: |
| 611 | return f"{round(time / 3600, 2)} hours" |
| 612 | else: |
| 613 | return f"{round(time / 86400, 2)} days" |
| 614 | |
| 615 | time_string = format_time(expected_time + 140) |
| 616 | sys.stdout.write( |
| 617 | f"Once your model starts training, it'll approximately take {time_string} to train a `curie` model, and less for `ada` and `babbage`. Queue will approximately take half an hour per job ahead of you.\n" |
| 618 | ) |
| 619 | |
| 620 | |
| 621 | def get_outfnames(fname: str, split: bool) -> list[str]: |
no test coverage detected