hub / github.com/pandas-dev/pandas / _get_data_buffer

Method _get_data_buffer

pandas/core/interchange/column.py:309–382 · view source on GitHub ↗

Return the buffer containing the data and the buffer's associated dtype.

(
        self,
    )

Source from the content-addressed store, hash-verified

307	return buffers
308
309	def _get_data_buffer(
310	self,
311	) -> tuple[Buffer, tuple[DtypeKind, int, str, str]]:
312	"""
313	Return the buffer containing the data and the buffer's associated dtype.
314	"""
315	buffer: Buffer
316	if self.dtype[0] == DtypeKind.DATETIME:
317	# self.dtype[2] is an ArrowCTypes.TIMESTAMP where the tz will make
318	# it longer than 4 characters
319	if len(self.dtype[2]) > 4:
320	np_arr = self._col.dt.tz_convert(None).to_numpy()
321	else:
322	np_arr = self._col.to_numpy()
323	buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
324	dtype = (
325	DtypeKind.INT,
326	64,
327	ArrowCTypes.INT64,
328	Endianness.NATIVE,
329	)
330	elif self.dtype[0] in (
331	DtypeKind.INT,
332	DtypeKind.UINT,
333	DtypeKind.FLOAT,
334	DtypeKind.BOOL,
335	):
336	dtype = self.dtype
337	arr = self._col.array
338	if isinstance(self._col.dtype, ArrowDtype):
339	# We already rechunk (if necessary / allowed) upon initialization, so
340	# this is already single-chunk by the time we get here.
341	arr = arr._pa_array.chunks[0] # type: ignore[attr-defined]
342	buffer = PandasBufferPyarrow(
343	arr.buffers()[1],
344	length=len(arr),
345	)
346	return buffer, dtype
347	if isinstance(self._col.dtype, BaseMaskedDtype):
348	np_arr = arr._data # type: ignore[attr-defined]
349	else:
350	np_arr = arr._ndarray # type: ignore[attr-defined]
351	buffer = PandasBuffer(np_arr, allow_copy=self._allow_copy)
352	elif self.dtype[0] == DtypeKind.CATEGORICAL:
353	codes = self._col.values._codes
354	buffer = PandasBuffer(codes, allow_copy=self._allow_copy)
355	dtype = self._dtype_from_pandasdtype(codes.dtype)
356	elif self.dtype[0] == DtypeKind.STRING:
357	# Marshal the strings from a NumPy object array into a byte array
358	buf = self._col.to_numpy()
359	b = bytearray()
360
361	# TODO: this for-loop is slow; can be implemented in Cython/C/C++ later
362	for obj in buf:
363	if isinstance(obj, str):
364	b.extend(obj.encode(encoding="utf-8"))
365
366	# Convert the byte array to a Pandas "buffer" using

Callers 1

get_buffersMethod · 0.95

Calls 6

_dtype_from_pandasdtypeMethod · 0.95

PandasBufferClass · 0.90

PandasBufferPyarrowClass · 0.90

encodeMethod · 0.80

to_numpyMethod · 0.45

tz_convertMethod · 0.45

Tested by

no test coverage detected