MCPcopy
hub / github.com/openai/openai-python / long_examples_validator

Function long_examples_validator

src/openai/lib/_validators.py:156–190  ·  view source on GitHub ↗

This validator will suggest to the user to remove examples that are too long.

(df: pd.DataFrame)

Source from the content-addressed store, hash-verified

154
155
156def long_examples_validator(df: pd.DataFrame) -> Remediation:
157 """
158 This validator will suggest to the user to remove examples that are too long.
159 """
160 immediate_msg = None
161 optional_msg = None
162 optional_fn = None # type: ignore
163
164 ft_type = infer_task_type(df)
165 if ft_type != "open-ended generation":
166
167 def get_long_indexes(d: pd.DataFrame) -> Any:
168 long_examples = d.apply(lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1)
169 return d.reset_index().index[long_examples].tolist()
170
171 long_indexes = get_long_indexes(df)
172
173 if len(long_indexes) > 0:
174 immediate_msg = f"\n- There are {len(long_indexes)} examples that are very long. These are rows: {long_indexes}\nFor conditional generation, and for classification the examples shouldn't be longer than 2048 tokens."
175 optional_msg = f"Remove {len(long_indexes)} long examples"
176
177 def optional_fn(x: Any) -> Any:
178 long_indexes_to_drop = get_long_indexes(x)
179 if long_indexes != long_indexes_to_drop:
180 sys.stdout.write(
181 f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n"
182 )
183 return x.drop(long_indexes_to_drop)
184
185 return Remediation(
186 name="long_examples",
187 immediate_msg=immediate_msg,
188 optional_msg=optional_msg,
189 optional_fn=optional_fn,
190 )
191
192
193def common_prompt_suffix_validator(df: pd.DataFrame) -> Remediation:

Callers

nothing calls this directly

Calls 3

infer_task_typeFunction · 0.85
get_long_indexesFunction · 0.85
RemediationClass · 0.85

Tested by

no test coverage detected