MCPcopy
hub / github.com/pandas-dev/pandas / _get_data_buffer

Method _get_data_buffer

pandas/core/interchange/column.py:309–382  ·  view source on GitHub ↗

Return the buffer containing the data and the buffer's associated dtype.

(
        self,
    )

Source from the content-addressed store, hash-verified

307 return buffers
308
309 def _get_data_buffer(
310 self,
311 ) -> tuple[Buffer, tuple[DtypeKind, int, str, str]]:
312 """
313 Return the buffer containing the data and the buffer's associated dtype.
314 """
315 buffer: Buffer
316 if self.dtype[0] == DtypeKind.DATETIME:
317 # self.dtype[2] is an ArrowCTypes.TIMESTAMP where the tz will make
318 # it longer than 4 characters
319 if len(self.dtype[2]) > 4:
320 np_arr = self._col.dt.tz_convert(None).to_numpy()
321 else:
322 np_arr = self._col.to_numpy()
323 buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
324 dtype = (
325 DtypeKind.INT,
326 64,
327 ArrowCTypes.INT64,
328 Endianness.NATIVE,
329 )
330 elif self.dtype[0] in (
331 DtypeKind.INT,
332 DtypeKind.UINT,
333 DtypeKind.FLOAT,
334 DtypeKind.BOOL,
335 ):
336 dtype = self.dtype
337 arr = self._col.array
338 if isinstance(self._col.dtype, ArrowDtype):
339 # We already rechunk (if necessary / allowed) upon initialization, so
340 # this is already single-chunk by the time we get here.
341 arr = arr._pa_array.chunks[0] # type: ignore[attr-defined]
342 buffer = PandasBufferPyarrow(
343 arr.buffers()[1],
344 length=len(arr),
345 )
346 return buffer, dtype
347 if isinstance(self._col.dtype, BaseMaskedDtype):
348 np_arr = arr._data # type: ignore[attr-defined]
349 else:
350 np_arr = arr._ndarray # type: ignore[attr-defined]
351 buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
352 elif self.dtype[0] == DtypeKind.CATEGORICAL:
353 codes = self._col.values._codes
354 buffer = PandasBuffer(codes, allow_copy=self._allow_copy)
355 dtype = self._dtype_from_pandasdtype(codes.dtype)
356 elif self.dtype[0] == DtypeKind.STRING:
357 # Marshal the strings from a NumPy object array into a byte array
358 buf = self._col.to_numpy()
359 b = bytearray()
360
361 # TODO: this for-loop is slow; can be implemented in Cython/C/C++ later
362 for obj in buf:
363 if isinstance(obj, str):
364 b.extend(obj.encode(encoding="utf-8"))
365
366 # Convert the byte array to a Pandas "buffer" using

Callers 1

get_buffersMethod · 0.95

Calls 6

PandasBufferClass · 0.90
PandasBufferPyarrowClass · 0.90
encodeMethod · 0.80
to_numpyMethod · 0.45
tz_convertMethod · 0.45

Tested by

no test coverage detected