hub / github.com/pandas-dev/pandas / setup

Method setup

asv_bench/benchmarks/groupby.py:490–570 · view source on GitHub ↗

(self, dtype, method, application, ncols, engine)

Source from the content-addressed store, hash-verified

488	]
489
490	def setup(self, dtype, method, application, ncols, engine):
491	if method in method_blocklist.get(dtype, {}):
492	raise NotImplementedError # skip benchmark
493
494	if ncols != 1 and method in ["value_counts", "unique"]:
495	# DataFrameGroupBy doesn't have these methods
496	raise NotImplementedError
497
498	if application == "transformation" and method in [
499	"describe",
500	"head",
501	"tail",
502	"unique",
503	"value_counts",
504	"size",
505	]:
506	# DataFrameGroupBy doesn't have these methods
507	raise NotImplementedError
508
509	# Numba currently doesn't support
510	# multiple transform functions or strs for transform,
511	# grouping on multiple columns
512	# and we lack kernels for a bunch of methods
513	if (
514	(engine == "numba" and method in _numba_unsupported_methods)
515	or ncols > 1
516	or application == "transformation"
517	or dtype == "datetime"
518	):
519	raise NotImplementedError
520
521	if method == "describe":
522	ngroups = 20
523	elif method == "skew":
524	ngroups = 100
525	else:
526	ngroups = 1000
527	size = ngroups * 2
528	rng = np.arange(ngroups).reshape(-1, 1)
529	rng = np.broadcast_to(rng, (len(rng), ncols))
530	taker = np.random.randint(0, ngroups, size=size)
531	values = rng.take(taker, axis=0)
532	if dtype == "int":
533	key = np.random.randint(0, size, size=size)
534	elif dtype in ("int16", "uint"):
535	key = np.random.randint(0, size, size=size, dtype=dtype)
536	elif dtype == "float":
537	key = np.concatenate(
538	[np.random.random(ngroups) * 0.1, np.random.random(ngroups) * 10.0]
539	)
540	elif dtype == "object":
541	key = ["foo"] * size
542	elif dtype == "datetime":
543	key = date_range("1/1/2011", periods=size, freq="s")
544
545	cols = [f"values{n}" for n in range(ncols)]
546	df = DataFrame(values, columns=cols)
547	df["key"] = key

Callers

nothing calls this directly

Calls 7

groupbyMethod · 0.95

date_rangeFunction · 0.90

DataFrameClass · 0.90

getMethod · 0.45

reshapeMethod · 0.45

takeMethod · 0.45

transformMethod · 0.45

Tested by

no test coverage detected