This validator will ensure that no completion is empty.
(df: pd.DataFrame, field: str = "completion")
| 103 | |
| 104 | |
| 105 | def non_empty_field_validator(df: pd.DataFrame, field: str = "completion") -> Remediation: |
| 106 | """ |
| 107 | This validator will ensure that no completion is empty. |
| 108 | """ |
| 109 | necessary_msg = None |
| 110 | necessary_fn = None # type: ignore |
| 111 | immediate_msg = None |
| 112 | |
| 113 | if df[field].apply(lambda x: x == "").any() or df[field].isnull().any(): |
| 114 | empty_rows = (df[field] == "") | (df[field].isnull()) |
| 115 | empty_indexes = df.reset_index().index[empty_rows].tolist() |
| 116 | immediate_msg = f"\n- `{field}` column/key should not contain empty strings. These are rows: {empty_indexes}" |
| 117 | |
| 118 | def necessary_fn(x: Any) -> Any: |
| 119 | return x[x[field] != ""].dropna(subset=[field]) |
| 120 | |
| 121 | necessary_msg = f"Remove {len(empty_indexes)} rows with empty {field}s" |
| 122 | |
| 123 | return Remediation( |
| 124 | name=f"empty_{field}", |
| 125 | immediate_msg=immediate_msg, |
| 126 | necessary_msg=necessary_msg, |
| 127 | necessary_fn=necessary_fn, |
| 128 | ) |
| 129 | |
| 130 | |
| 131 | def duplicated_rows_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation: |
nothing calls this directly
no test coverage detected