Return the buffer containing the data and the buffer's associated dtype.
(
self,
)
| 307 | return buffers |
| 308 | |
| 309 | def _get_data_buffer( |
| 310 | self, |
| 311 | ) -> tuple[Buffer, tuple[DtypeKind, int, str, str]]: |
| 312 | """ |
| 313 | Return the buffer containing the data and the buffer's associated dtype. |
| 314 | """ |
| 315 | buffer: Buffer |
| 316 | if self.dtype[0] == DtypeKind.DATETIME: |
| 317 | # self.dtype[2] is an ArrowCTypes.TIMESTAMP where the tz will make |
| 318 | # it longer than 4 characters |
| 319 | if len(self.dtype[2]) > 4: |
| 320 | np_arr = self._col.dt.tz_convert(None).to_numpy() |
| 321 | else: |
| 322 | np_arr = self._col.to_numpy() |
| 323 | buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy) |
| 324 | dtype = ( |
| 325 | DtypeKind.INT, |
| 326 | 64, |
| 327 | ArrowCTypes.INT64, |
| 328 | Endianness.NATIVE, |
| 329 | ) |
| 330 | elif self.dtype[0] in ( |
| 331 | DtypeKind.INT, |
| 332 | DtypeKind.UINT, |
| 333 | DtypeKind.FLOAT, |
| 334 | DtypeKind.BOOL, |
| 335 | ): |
| 336 | dtype = self.dtype |
| 337 | arr = self._col.array |
| 338 | if isinstance(self._col.dtype, ArrowDtype): |
| 339 | # We already rechunk (if necessary / allowed) upon initialization, so |
| 340 | # this is already single-chunk by the time we get here. |
| 341 | arr = arr._pa_array.chunks[0] # type: ignore[attr-defined] |
| 342 | buffer = PandasBufferPyarrow( |
| 343 | arr.buffers()[1], |
| 344 | length=len(arr), |
| 345 | ) |
| 346 | return buffer, dtype |
| 347 | if isinstance(self._col.dtype, BaseMaskedDtype): |
| 348 | np_arr = arr._data # type: ignore[attr-defined] |
| 349 | else: |
| 350 | np_arr = arr._ndarray # type: ignore[attr-defined] |
| 351 | buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy) |
| 352 | elif self.dtype[0] == DtypeKind.CATEGORICAL: |
| 353 | codes = self._col.values._codes |
| 354 | buffer = PandasBuffer(codes, allow_copy=self._allow_copy) |
| 355 | dtype = self._dtype_from_pandasdtype(codes.dtype) |
| 356 | elif self.dtype[0] == DtypeKind.STRING: |
| 357 | # Marshal the strings from a NumPy object array into a byte array |
| 358 | buf = self._col.to_numpy() |
| 359 | b = bytearray() |
| 360 | |
| 361 | # TODO: this for-loop is slow; can be implemented in Cython/C/C++ later |
| 362 | for obj in buf: |
| 363 | if isinstance(obj, str): |
| 364 | b.extend(obj.encode(encoding="utf-8")) |
| 365 | |
| 366 | # Convert the byte array to a Pandas "buffer" using |
no test coverage detected