| 132 | |
| 133 | class Hashing: |
| 134 | def setup_cache(self): |
| 135 | N = 10**5 |
| 136 | |
| 137 | df = pd.DataFrame( |
| 138 | { |
| 139 | "strings": pd.Series( |
| 140 | pd.Index([f"i-{i}" for i in range(10000)], dtype=object).take( |
| 141 | np.random.randint(0, 10000, size=N) |
| 142 | ) |
| 143 | ), |
| 144 | "floats": np.random.randn(N), |
| 145 | "ints": np.arange(N), |
| 146 | "dates": pd.date_range("20110101", freq="s", periods=N), |
| 147 | "timedeltas": pd.timedelta_range("1 day", freq="s", periods=N), |
| 148 | } |
| 149 | ) |
| 150 | df["categories"] = df["strings"].astype("category") |
| 151 | df.iloc[10:20] = np.nan |
| 152 | return df |
| 153 | |
| 154 | def time_frame(self, df): |
| 155 | hashing.hash_pandas_object(df) |