| 32 | param_names = ["unique", "sort", "dtype"] |
| 33 | |
| 34 | def setup(self, unique, sort, dtype): |
| 35 | N = 10**5 |
| 36 | |
| 37 | if dtype in ["int64", "uint64", "Int64", "object"]: |
| 38 | data = pd.Index(np.arange(N), dtype=dtype) |
| 39 | elif dtype == "float64": |
| 40 | data = pd.Index(np.random.randn(N), dtype=dtype) |
| 41 | elif dtype == "boolean": |
| 42 | data = pd.array(np.random.randint(0, 2, N), dtype=dtype) |
| 43 | elif dtype == "datetime64[ns]": |
| 44 | data = pd.date_range("2011-01-01", freq="h", periods=N) |
| 45 | elif dtype == "datetime64[ns, tz]": |
| 46 | data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo") |
| 47 | elif dtype == "object_str": |
| 48 | data = pd.Index([f"i-{i}" for i in range(N)], dtype=object) |
| 49 | elif dtype == "string[pyarrow]": |
| 50 | data = pd.array( |
| 51 | pd.Index([f"i-{i}" for i in range(N)], dtype=object), |
| 52 | dtype="string[pyarrow]", |
| 53 | ) |
| 54 | else: |
| 55 | raise NotImplementedError |
| 56 | |
| 57 | if not unique: |
| 58 | data = data.repeat(5) |
| 59 | self.data = data |
| 60 | |
| 61 | def time_factorize(self, unique, sort, dtype): |
| 62 | pd.factorize(self.data, sort=sort) |