(
self, options: dict[str, Any], engine: CSVEngine
)
| 1717 | ) |
| 1718 | |
| 1719 | def _clean_options( |
| 1720 | self, options: dict[str, Any], engine: CSVEngine |
| 1721 | ) -> tuple[dict[str, Any], CSVEngine]: |
| 1722 | result = options.copy() |
| 1723 | |
| 1724 | fallback_reason = None |
| 1725 | |
| 1726 | # C engine not supported yet |
| 1727 | if engine == "c": |
| 1728 | if options["skipfooter"] > 0: |
| 1729 | fallback_reason = "the 'c' engine does not support skipfooter" |
| 1730 | engine = "python" |
| 1731 | |
| 1732 | sep = options["delimiter"] |
| 1733 | |
| 1734 | if sep is not None and len(sep) > 1: |
| 1735 | if engine == "c" and sep == r"\s+": |
| 1736 | # delim_whitespace passed on to pandas._libs.parsers.TextReader |
| 1737 | result["delim_whitespace"] = True |
| 1738 | del result["delimiter"] |
| 1739 | elif engine not in ("python", "python-fwf"): |
| 1740 | # wait until regex engine integrated |
| 1741 | fallback_reason = ( |
| 1742 | f"the '{engine}' engine does not support " |
| 1743 | "regex separators (separators > 1 char and " |
| 1744 | r"different from '\s+' are interpreted as regex)" |
| 1745 | ) |
| 1746 | engine = "python" |
| 1747 | elif sep is not None: |
| 1748 | encodeable = True |
| 1749 | encoding = sys.getfilesystemencoding() or "utf-8" |
| 1750 | try: |
| 1751 | if len(sep.encode(encoding)) > 1: |
| 1752 | encodeable = False |
| 1753 | except UnicodeDecodeError: |
| 1754 | encodeable = False |
| 1755 | if not encodeable and engine not in ("python", "python-fwf"): |
| 1756 | fallback_reason = ( |
| 1757 | f"the separator encoded in {encoding} " |
| 1758 | f"is > 1 char long, and the '{engine}' engine " |
| 1759 | "does not support such separators" |
| 1760 | ) |
| 1761 | engine = "python" |
| 1762 | |
| 1763 | quotechar = options["quotechar"] |
| 1764 | if quotechar is not None and isinstance(quotechar, (str, bytes)): |
| 1765 | if ( |
| 1766 | len(quotechar) == 1 |
| 1767 | and ord(quotechar) > 127 |
| 1768 | and engine not in ("python", "python-fwf") |
| 1769 | ): |
| 1770 | fallback_reason = ( |
| 1771 | "ord(quotechar) > 127, meaning the " |
| 1772 | "quotechar is larger than one byte, " |
| 1773 | f"and the '{engine}' engine does not support such quotechars" |
| 1774 | ) |
| 1775 | engine = "python" |
| 1776 |
no test coverage detected