Reads the contents of a CSV file into a DataFrame and processes it according to the kwargs passed in the constructor. Returns ------- DataFrame The DataFrame created from the CSV file.
(self)
| 266 | ) |
| 267 | |
| 268 | def read(self) -> DataFrame: |
| 269 | """ |
| 270 | Reads the contents of a CSV file into a DataFrame and |
| 271 | processes it according to the kwargs passed in the |
| 272 | constructor. |
| 273 | |
| 274 | Returns |
| 275 | ------- |
| 276 | DataFrame |
| 277 | The DataFrame created from the CSV file. |
| 278 | """ |
| 279 | pa = import_optional_dependency("pyarrow") |
| 280 | pyarrow_csv = import_optional_dependency("pyarrow.csv") |
| 281 | self._get_pyarrow_options() |
| 282 | convert_options = self._get_convert_options() |
| 283 | |
| 284 | try: |
| 285 | table = pyarrow_csv.read_csv( |
| 286 | self.src, |
| 287 | read_options=pyarrow_csv.ReadOptions(**self.read_options), |
| 288 | parse_options=pyarrow_csv.ParseOptions(**self.parse_options), |
| 289 | convert_options=convert_options, |
| 290 | ) |
| 291 | except pa.ArrowInvalid as e: |
| 292 | raise ParserError(e) from e |
| 293 | |
| 294 | dtype_backend = self.kwds["dtype_backend"] |
| 295 | |
| 296 | # Convert all pa.null() cols -> float64 (non nullable) |
| 297 | # else Int64 (nullable case, see below) |
| 298 | if dtype_backend is lib.no_default: |
| 299 | new_schema = table.schema |
| 300 | new_type = pa.float64() |
| 301 | for i, arrow_type in enumerate(table.schema.types): |
| 302 | if pa.types.is_null(arrow_type): |
| 303 | new_schema = new_schema.set( |
| 304 | i, new_schema.field(i).with_type(new_type) |
| 305 | ) |
| 306 | |
| 307 | table = table.cast(new_schema) |
| 308 | |
| 309 | multi_index_named = self._adjust_column_names(table) |
| 310 | |
| 311 | with warnings.catch_warnings(): |
| 312 | warnings.filterwarnings( |
| 313 | "ignore", |
| 314 | "make_block is deprecated", |
| 315 | Pandas4Warning, |
| 316 | ) |
| 317 | frame = arrow_table_to_pandas( |
| 318 | table, |
| 319 | dtype_backend=dtype_backend, |
| 320 | null_to_int64=True, |
| 321 | dtype=self.dtype, |
| 322 | names=self.names, |
| 323 | ) |
| 324 | |
| 325 | if self.header is None: |
nothing calls this directly
no test coverage detected