This validator will only print out the number of examples and recommend to the user to increase the number of examples if less than 100.
(df: pd.DataFrame)
| 23 | |
| 24 | |
| 25 | def num_examples_validator(df: pd.DataFrame) -> Remediation: |
| 26 | """ |
| 27 | This validator will only print out the number of examples and recommend to the user to increase the number of examples if less than 100. |
| 28 | """ |
| 29 | MIN_EXAMPLES = 100 |
| 30 | optional_suggestion = ( |
| 31 | "" |
| 32 | if len(df) >= MIN_EXAMPLES |
| 33 | else ". In general, we recommend having at least a few hundred examples. We've found that performance tends to linearly increase for every doubling of the number of examples" |
| 34 | ) |
| 35 | immediate_msg = f"\n- Your file contains {len(df)} prompt-completion pairs{optional_suggestion}" |
| 36 | return Remediation(name="num_examples", immediate_msg=immediate_msg) |
| 37 | |
| 38 | |
| 39 | def necessary_column_validator(df: pd.DataFrame, necessary_column: str) -> Remediation: |
nothing calls this directly
no test coverage detected