MCPcopy
hub / github.com/openai/openai-python / lower_case_validator

Function lower_case_validator

src/openai/lib/_validators.py:429–448  ·  view source on GitHub ↗

This validator will suggest to lowercase the column values, if more than a third of letters are uppercase.

(df: pd.DataFrame, column: Any)

Source from the content-addressed store, hash-verified

427
428
429def lower_case_validator(df: pd.DataFrame, column: Any) -> Remediation | None:
430 """
431 This validator will suggest to lowercase the column values, if more than a third of letters are uppercase.
432 """
433
434 def lower_case(x: Any) -> Any:
435 x[column] = x[column].str.lower()
436 return x
437
438 count_upper = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.isupper())).sum()
439 count_lower = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.islower())).sum()
440
441 if count_upper * 2 > count_lower:
442 return Remediation(
443 name="lower_case",
444 immediate_msg=f"\n- More than a third of your `{column}` column/key is uppercase. Uppercase {column}s tends to perform worse than a mixture of case encountered in normal language. We recommend to lower case the data if that makes sense in your domain. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details",
445 optional_msg=f"Lowercase all your data in column/key `{column}`",
446 optional_fn=lower_case,
447 )
448 return None
449
450
451def read_any_format(

Callers 1

get_validatorsFunction · 0.85

Calls 1

RemediationClass · 0.85

Tested by

no test coverage detected