This validator will suggest to remove a common prefix from the completion if a long one exist.
(df: pd.DataFrame)
| 298 | |
| 299 | |
| 300 | def common_completion_prefix_validator(df: pd.DataFrame) -> Remediation: |
| 301 | """ |
| 302 | This validator will suggest to remove a common prefix from the completion if a long one exist. |
| 303 | """ |
| 304 | MAX_PREFIX_LEN = 5 |
| 305 | |
| 306 | common_prefix = get_common_xfix(df.completion, xfix="prefix") |
| 307 | ws_prefix = len(common_prefix) > 0 and common_prefix[0] == " " |
| 308 | if len(common_prefix) < MAX_PREFIX_LEN: |
| 309 | return Remediation(name="common_prefix") |
| 310 | |
| 311 | def remove_common_prefix(x: Any, prefix: Any, ws_prefix: Any) -> Any: |
| 312 | x["completion"] = x["completion"].str[len(prefix) :] |
| 313 | if ws_prefix: |
| 314 | # keep the single whitespace as prefix |
| 315 | x["completion"] = f" {x['completion']}" |
| 316 | return x |
| 317 | |
| 318 | if (df.completion == common_prefix).all(): |
| 319 | # already handled by common_suffix_validator |
| 320 | return Remediation(name="common_prefix") |
| 321 | |
| 322 | immediate_msg = f"\n- All completions start with prefix `{common_prefix}`. Most of the time you should only add the output data into the completion, without any prefix" |
| 323 | optional_msg = f"Remove prefix `{common_prefix}` from all completions" |
| 324 | |
| 325 | def optional_fn(x: Any) -> Any: |
| 326 | return remove_common_prefix(x, common_prefix, ws_prefix) |
| 327 | |
| 328 | return Remediation( |
| 329 | name="common_completion_prefix", |
| 330 | immediate_msg=immediate_msg, |
| 331 | optional_msg=optional_msg, |
| 332 | optional_fn=optional_fn, |
| 333 | ) |
| 334 | |
| 335 | |
| 336 | def common_completion_suffix_validator(df: pd.DataFrame) -> Remediation: |
nothing calls this directly
no test coverage detected