MCPcopy
hub / github.com/openai/openai-python / common_prompt_suffix_validator

Function common_prompt_suffix_validator

src/openai/lib/_validators.py:193–258  ·  view source on GitHub ↗

This validator will suggest to add a common suffix to the prompt if one doesn't already exist in case of classification or conditional generation.

(df: pd.DataFrame)

Source from the content-addressed store, hash-verified

191
192
193def common_prompt_suffix_validator(df: pd.DataFrame) -> Remediation:
194 """
195 This validator will suggest to add a common suffix to the prompt if one doesn't already exist in case of classification or conditional generation.
196 """
197 error_msg = None
198 immediate_msg = None
199 optional_msg = None
200 optional_fn = None # type: ignore
201
202 # Find a suffix which is not contained within the prompt otherwise
203 suggested_suffix = "\n\n### =>\n\n"
204 suffix_options = [
205 " ->",
206 "\n\n###\n\n",
207 "\n\n===\n\n",
208 "\n\n---\n\n",
209 "\n\n===>\n\n",
210 "\n\n--->\n\n",
211 ]
212 for suffix_option in suffix_options:
213 if suffix_option == " ->":
214 if df.prompt.str.contains("\n").any():
215 continue
216 if df.prompt.str.contains(suffix_option, regex=False).any():
217 continue
218 suggested_suffix = suffix_option
219 break
220 display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
221
222 ft_type = infer_task_type(df)
223 if ft_type == "open-ended generation":
224 return Remediation(name="common_suffix")
225
226 def add_suffix(x: Any, suffix: Any) -> Any:
227 x["prompt"] += suffix
228 return x
229
230 common_suffix = get_common_xfix(df.prompt, xfix="suffix")
231 if (df.prompt == common_suffix).all():
232 error_msg = f"All prompts are identical: `{common_suffix}`\nConsider leaving the prompts blank if you want to do open-ended generation, otherwise ensure prompts are different"
233 return Remediation(name="common_suffix", error_msg=error_msg)
234
235 if common_suffix != "":
236 common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
237 immediate_msg = f"\n- All prompts end with suffix `{common_suffix_new_line_handled}`"
238 if len(common_suffix) > 10:
239 immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
240 if df.prompt.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
241 immediate_msg += f"\n WARNING: Some of your prompts contain the suffix `{common_suffix}` more than once. We strongly suggest that you review your prompts and add a unique suffix"
242
243 else:
244 immediate_msg = "\n- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty"
245
246 if common_suffix == "":
247 optional_msg = f"Add a suffix separator `{display_suggested_suffix}` to all prompts"
248
249 def optional_fn(x: Any) -> Any:
250 return add_suffix(x, suggested_suffix)

Callers

nothing calls this directly

Calls 3

infer_task_typeFunction · 0.85
RemediationClass · 0.85
get_common_xfixFunction · 0.85

Tested by

no test coverage detected