MCPcopy
hub / github.com/pandas-dev/pandas / setup

Method setup

asv_bench/benchmarks/groupby.py:490–570  ·  view source on GitHub ↗
(self, dtype, method, application, ncols, engine)

Source from the content-addressed store, hash-verified

488 ]
489
490 def setup(self, dtype, method, application, ncols, engine):
491 if method in method_blocklist.get(dtype, {}):
492 raise NotImplementedError # skip benchmark
493
494 if ncols != 1 and method in ["value_counts", "unique"]:
495 # DataFrameGroupBy doesn't have these methods
496 raise NotImplementedError
497
498 if application == "transformation" and method in [
499 "describe",
500 "head",
501 "tail",
502 "unique",
503 "value_counts",
504 "size",
505 ]:
506 # DataFrameGroupBy doesn't have these methods
507 raise NotImplementedError
508
509 # Numba currently doesn't support
510 # multiple transform functions or strs for transform,
511 # grouping on multiple columns
512 # and we lack kernels for a bunch of methods
513 if (
514 (engine == "numba" and method in _numba_unsupported_methods)
515 or ncols > 1
516 or application == "transformation"
517 or dtype == "datetime"
518 ):
519 raise NotImplementedError
520
521 if method == "describe":
522 ngroups = 20
523 elif method == "skew":
524 ngroups = 100
525 else:
526 ngroups = 1000
527 size = ngroups * 2
528 rng = np.arange(ngroups).reshape(-1, 1)
529 rng = np.broadcast_to(rng, (len(rng), ncols))
530 taker = np.random.randint(0, ngroups, size=size)
531 values = rng.take(taker, axis=0)
532 if dtype == "int":
533 key = np.random.randint(0, size, size=size)
534 elif dtype in ("int16", "uint"):
535 key = np.random.randint(0, size, size=size, dtype=dtype)
536 elif dtype == "float":
537 key = np.concatenate(
538 [np.random.random(ngroups) * 0.1, np.random.random(ngroups) * 10.0]
539 )
540 elif dtype == "object":
541 key = ["foo"] * size
542 elif dtype == "datetime":
543 key = date_range("1/1/2011", periods=size, freq="s")
544
545 cols = [f"values{n}" for n in range(ncols)]
546 df = DataFrame(values, columns=cols)
547 df["key"] = key

Callers

nothing calls this directly

Calls 7

groupbyMethod · 0.95
date_rangeFunction · 0.90
DataFrameClass · 0.90
getMethod · 0.45
reshapeMethod · 0.45
takeMethod · 0.45
transformMethod · 0.45

Tested by

no test coverage detected