Function _get_join_keys

pandas/core/reshape/merge.py:3008–3042 · view source on GitHub ↗

(
    llab: list[npt.NDArray[np.int64 | np.intp]],
    rlab: list[npt.NDArray[np.int64 | np.intp]],
    shape: Shape,
    sort: bool,
)

Source from the content-addressed store, hash-verified

3006
3007
3008	def _get_join_keys(
3009	llab: list[npt.NDArray[np.int64 \| np.intp]],
3010	rlab: list[npt.NDArray[np.int64 \| np.intp]],
3011	shape: Shape,
3012	sort: bool,
3013	) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]:
3014	# how many levels can be done without overflow
3015	nlev = next(
3016	lev
3017	for lev in range(len(shape), 0, -1)
3018	if not is_int64_overflow_possible(shape[:lev])
3019	)
3020
3021	# get keys for the first `nlev` levels
3022	stride = np.prod(shape[1:nlev], dtype="i8")
3023	lkey = stride * llab[0].astype("i8", subok=False, copy=False)
3024	rkey = stride * rlab[0].astype("i8", subok=False, copy=False)
3025
3026	for i in range(1, nlev):
3027	with np.errstate(divide="ignore"):
3028	stride //= shape[i]
3029	lkey += llab[i] * stride
3030	rkey += rlab[i] * stride
3031
3032	if nlev == len(shape): # all done!
3033	return lkey, rkey
3034
3035	# densify current keys to avoid overflow
3036	lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
3037
3038	llab = [lkey, *llab[nlev:]]
3039	rlab = [rkey, *rlab[nlev:]]
3040	shape = (count, *shape[nlev:])
3041
3042	return _get_join_keys(llab, rlab, shape, sort)
3043
3044
3045	def _should_fill(lname, rname) -> bool:

get_join_indexersFunction · 0.85

_get_multiindex_indexerFunction · 0.85

is_int64_overflow_possibleFunction · 0.90

_factorize_keysFunction · 0.85

prodMethod · 0.45

astypeMethod · 0.45

no test coverage detected