| 488 | ] |
| 489 | |
| 490 | def setup(self, dtype, method, application, ncols, engine): |
| 491 | if method in method_blocklist.get(dtype, {}): |
| 492 | raise NotImplementedError # skip benchmark |
| 493 | |
| 494 | if ncols != 1 and method in ["value_counts", "unique"]: |
| 495 | # DataFrameGroupBy doesn't have these methods |
| 496 | raise NotImplementedError |
| 497 | |
| 498 | if application == "transformation" and method in [ |
| 499 | "describe", |
| 500 | "head", |
| 501 | "tail", |
| 502 | "unique", |
| 503 | "value_counts", |
| 504 | "size", |
| 505 | ]: |
| 506 | # DataFrameGroupBy doesn't have these methods |
| 507 | raise NotImplementedError |
| 508 | |
| 509 | # Numba currently doesn't support |
| 510 | # multiple transform functions or strs for transform, |
| 511 | # grouping on multiple columns |
| 512 | # and we lack kernels for a bunch of methods |
| 513 | if ( |
| 514 | (engine == "numba" and method in _numba_unsupported_methods) |
| 515 | or ncols > 1 |
| 516 | or application == "transformation" |
| 517 | or dtype == "datetime" |
| 518 | ): |
| 519 | raise NotImplementedError |
| 520 | |
| 521 | if method == "describe": |
| 522 | ngroups = 20 |
| 523 | elif method == "skew": |
| 524 | ngroups = 100 |
| 525 | else: |
| 526 | ngroups = 1000 |
| 527 | size = ngroups * 2 |
| 528 | rng = np.arange(ngroups).reshape(-1, 1) |
| 529 | rng = np.broadcast_to(rng, (len(rng), ncols)) |
| 530 | taker = np.random.randint(0, ngroups, size=size) |
| 531 | values = rng.take(taker, axis=0) |
| 532 | if dtype == "int": |
| 533 | key = np.random.randint(0, size, size=size) |
| 534 | elif dtype in ("int16", "uint"): |
| 535 | key = np.random.randint(0, size, size=size, dtype=dtype) |
| 536 | elif dtype == "float": |
| 537 | key = np.concatenate( |
| 538 | [np.random.random(ngroups) * 0.1, np.random.random(ngroups) * 10.0] |
| 539 | ) |
| 540 | elif dtype == "object": |
| 541 | key = ["foo"] * size |
| 542 | elif dtype == "datetime": |
| 543 | key = date_range("1/1/2011", periods=size, freq="s") |
| 544 | |
| 545 | cols = [f"values{n}" for n in range(ncols)] |
| 546 | df = DataFrame(values, columns=cols) |
| 547 | df["key"] = key |