MCPcopy
hub / github.com/pandas-dev/pandas / _get_join_keys

Function _get_join_keys

pandas/core/reshape/merge.py:3008–3042  ·  view source on GitHub ↗
(
    llab: list[npt.NDArray[np.int64 | np.intp]],
    rlab: list[npt.NDArray[np.int64 | np.intp]],
    shape: Shape,
    sort: bool,
)

Source from the content-addressed store, hash-verified

3006
3007
3008def _get_join_keys(
3009 llab: list[npt.NDArray[np.int64 | np.intp]],
3010 rlab: list[npt.NDArray[np.int64 | np.intp]],
3011 shape: Shape,
3012 sort: bool,
3013) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]:
3014 # how many levels can be done without overflow
3015 nlev = next(
3016 lev
3017 for lev in range(len(shape), 0, -1)
3018 if not is_int64_overflow_possible(shape[:lev])
3019 )
3020
3021 # get keys for the first `nlev` levels
3022 stride = np.prod(shape[1:nlev], dtype="i8")
3023 lkey = stride * llab[0].astype("i8", subok=False, copy=False)
3024 rkey = stride * rlab[0].astype("i8", subok=False, copy=False)
3025
3026 for i in range(1, nlev):
3027 with np.errstate(divide="ignore"):
3028 stride //= shape[i]
3029 lkey += llab[i] * stride
3030 rkey += rlab[i] * stride
3031
3032 if nlev == len(shape): # all done!
3033 return lkey, rkey
3034
3035 # densify current keys to avoid overflow
3036 lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
3037
3038 llab = [lkey, *llab[nlev:]]
3039 rlab = [rkey, *rlab[nlev:]]
3040 shape = (count, *shape[nlev:])
3041
3042 return _get_join_keys(llab, rlab, shape, sort)
3043
3044
3045def _should_fill(lname, rname) -> bool:

Callers 2

get_join_indexersFunction · 0.85
_get_multiindex_indexerFunction · 0.85

Calls 4

_factorize_keysFunction · 0.85
prodMethod · 0.45
astypeMethod · 0.45

Tested by

no test coverage detected