MCPcopy
hub / github.com/openai/openai-python / common_completion_suffix_validator

Function common_completion_suffix_validator

src/openai/lib/_validators.py:336–401  ·  view source on GitHub ↗

This validator will suggest to add a common suffix to the completion if one doesn't already exist in case of classification or conditional generation.

(df: pd.DataFrame)

Source from the content-addressed store, hash-verified

334
335
336def common_completion_suffix_validator(df: pd.DataFrame) -> Remediation:
337 """
338 This validator will suggest to add a common suffix to the completion if one doesn't already exist in case of classification or conditional generation.
339 """
340 error_msg = None
341 immediate_msg = None
342 optional_msg = None
343 optional_fn = None # type: ignore
344
345 ft_type = infer_task_type(df)
346 if ft_type == "open-ended generation" or ft_type == "classification":
347 return Remediation(name="common_suffix")
348
349 common_suffix = get_common_xfix(df.completion, xfix="suffix")
350 if (df.completion == common_suffix).all():
351 error_msg = f"All completions are identical: `{common_suffix}`\nEnsure completions are different, otherwise the model will just repeat `{common_suffix}`"
352 return Remediation(name="common_suffix", error_msg=error_msg)
353
354 # Find a suffix which is not contained within the completion otherwise
355 suggested_suffix = " [END]"
356 suffix_options = [
357 "\n",
358 ".",
359 " END",
360 "***",
361 "+++",
362 "&&&",
363 "$$$",
364 "@@@",
365 "%%%",
366 ]
367 for suffix_option in suffix_options:
368 if df.completion.str.contains(suffix_option, regex=False).any():
369 continue
370 suggested_suffix = suffix_option
371 break
372 display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
373
374 def add_suffix(x: Any, suffix: Any) -> Any:
375 x["completion"] += suffix
376 return x
377
378 if common_suffix != "":
379 common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
380 immediate_msg = f"\n- All completions end with suffix `{common_suffix_new_line_handled}`"
381 if len(common_suffix) > 10:
382 immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
383 if df.completion.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
384 immediate_msg += f"\n WARNING: Some of your completions contain the suffix `{common_suffix}` more than once. We suggest that you review your completions and add a unique ending"
385
386 else:
387 immediate_msg = "\n- Your data does not contain a common ending at the end of your completions. Having a common ending string appended to the end of the completion makes it clearer to the fine-tuned model where the completion should end. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples."
388
389 if common_suffix == "":
390 optional_msg = f"Add a suffix ending `{display_suggested_suffix}` to all completions"
391
392 def optional_fn(x: Any) -> Any:
393 return add_suffix(x, suggested_suffix)

Callers

nothing calls this directly

Calls 3

infer_task_typeFunction · 0.85
RemediationClass · 0.85
get_common_xfixFunction · 0.85

Tested by

no test coverage detected