This validator will suggest to remove a common prefix from the prompt if a long one exist.
(df: pd.DataFrame)
| 259 | |
| 260 | |
| 261 | def common_prompt_prefix_validator(df: pd.DataFrame) -> Remediation: |
| 262 | """ |
| 263 | This validator will suggest to remove a common prefix from the prompt if a long one exist. |
| 264 | """ |
| 265 | MAX_PREFIX_LEN = 12 |
| 266 | |
| 267 | immediate_msg = None |
| 268 | optional_msg = None |
| 269 | optional_fn = None # type: ignore |
| 270 | |
| 271 | common_prefix = get_common_xfix(df.prompt, xfix="prefix") |
| 272 | if common_prefix == "": |
| 273 | return Remediation(name="common_prefix") |
| 274 | |
| 275 | def remove_common_prefix(x: Any, prefix: Any) -> Any: |
| 276 | x["prompt"] = x["prompt"].str[len(prefix) :] |
| 277 | return x |
| 278 | |
| 279 | if (df.prompt == common_prefix).all(): |
| 280 | # already handled by common_suffix_validator |
| 281 | return Remediation(name="common_prefix") |
| 282 | |
| 283 | if common_prefix != "": |
| 284 | immediate_msg = f"\n- All prompts start with prefix `{common_prefix}`" |
| 285 | if MAX_PREFIX_LEN < len(common_prefix): |
| 286 | immediate_msg += ". Fine-tuning doesn't require the instruction specifying the task, or a few-shot example scenario. Most of the time you should only add the input data into the prompt, and the desired output into the completion" |
| 287 | optional_msg = f"Remove prefix `{common_prefix}` from all prompts" |
| 288 | |
| 289 | def optional_fn(x: Any) -> Any: |
| 290 | return remove_common_prefix(x, common_prefix) |
| 291 | |
| 292 | return Remediation( |
| 293 | name="common_prompt_prefix", |
| 294 | immediate_msg=immediate_msg, |
| 295 | optional_msg=optional_msg, |
| 296 | optional_fn=optional_fn, |
| 297 | ) |
| 298 | |
| 299 | |
| 300 | def common_completion_prefix_validator(df: pd.DataFrame) -> Remediation: |
nothing calls this directly
no test coverage detected