MCPcopy
hub / github.com/openai/openai-python / read_any_format

Function read_any_format

src/openai/lib/_validators.py:451–537  ·  view source on GitHub ↗

This function will read a file saved in .csv, .json, .txt, .xlsx or .tsv format using pandas. - for .xlsx it will read the first sheet - for .txt it will assume completions and split on newline

(
    fname: str, fields: list[str] = ["prompt", "completion"]
)

Source from the content-addressed store, hash-verified

449
450
451def read_any_format(
452 fname: str, fields: list[str] = ["prompt", "completion"]
453) -> tuple[pd.DataFrame | None, Remediation]:
454 """
455 This function will read a file saved in .csv, .json, .txt, .xlsx or .tsv format using pandas.
456 - for .xlsx it will read the first sheet
457 - for .txt it will assume completions and split on newline
458 """
459 remediation = None
460 necessary_msg = None
461 immediate_msg = None
462 error_msg = None
463 df = None
464
465 if os.path.isfile(fname):
466 try:
467 if fname.lower().endswith(".csv") or fname.lower().endswith(".tsv"):
468 file_extension_str, separator = ("CSV", ",") if fname.lower().endswith(".csv") else ("TSV", "\t")
469 immediate_msg = (
470 f"\n- Based on your file extension, your file is formatted as a {file_extension_str} file"
471 )
472 necessary_msg = f"Your format `{file_extension_str}` will be converted to `JSONL`"
473 df = pd.read_csv(fname, sep=separator, dtype=str).fillna("")
474 elif fname.lower().endswith(".xlsx"):
475 immediate_msg = "\n- Based on your file extension, your file is formatted as an Excel file"
476 necessary_msg = "Your format `XLSX` will be converted to `JSONL`"
477 xls = pd.ExcelFile(fname)
478 sheets = xls.sheet_names
479 if len(sheets) > 1:
480 immediate_msg += "\n- Your Excel file contains more than one sheet. Please either save as csv or ensure all data is present in the first sheet. WARNING: Reading only the first sheet..."
481 df = pd.read_excel(fname, dtype=str).fillna("")
482 elif fname.lower().endswith(".txt"):
483 immediate_msg = "\n- Based on your file extension, you provided a text file"
484 necessary_msg = "Your format `TXT` will be converted to `JSONL`"
485 with open(fname, "r") as f:
486 content = f.read()
487 df = pd.DataFrame(
488 [["", line] for line in content.split("\n")],
489 columns=fields,
490 dtype=str,
491 ).fillna("")
492 elif fname.lower().endswith(".jsonl"):
493 df = pd.read_json(fname, lines=True, dtype=str).fillna("") # type: ignore
494 if len(df) == 1: # type: ignore
495 # this is NOT what we expect for a .jsonl file
496 immediate_msg = "\n- Your JSONL file appears to be in a JSON format. Your file will be converted to JSONL format"
497 necessary_msg = "Your format `JSON` will be converted to `JSONL`"
498 df = pd.read_json(fname, dtype=str).fillna("") # type: ignore
499 else:
500 pass # this is what we expect for a .jsonl file
501 elif fname.lower().endswith(".json"):
502 try:
503 # to handle case where .json file is actually a .jsonl file
504 df = pd.read_json(fname, lines=True, dtype=str).fillna("") # type: ignore
505 if len(df) == 1: # type: ignore
506 # this code path corresponds to a .json file that has one line
507 df = pd.read_json(fname, dtype=str).fillna("") # type: ignore
508 else:

Callers

nothing calls this directly

Calls 2

RemediationClass · 0.85
readMethod · 0.45

Tested by

no test coverage detected