| 139 | |
| 140 | |
| 141 | def test_value_counts_bins(index_or_series, using_infer_string): |
| 142 | klass = index_or_series |
| 143 | s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] |
| 144 | s = klass(s_values) |
| 145 | |
| 146 | # bins |
| 147 | msg = "bins argument only works with numeric data" |
| 148 | with pytest.raises(TypeError, match=msg): |
| 149 | s.value_counts(bins=1) |
| 150 | |
| 151 | s1 = Series([1, 1, 2, 3]) |
| 152 | res1 = s1.value_counts(bins=1) |
| 153 | exp1 = Series({Interval(0.997, 3.0): 4}, name="count") |
| 154 | tm.assert_series_equal(res1, exp1) |
| 155 | res1n = s1.value_counts(bins=1, normalize=True) |
| 156 | exp1n = Series({Interval(0.997, 3.0): 1.0}, name="proportion") |
| 157 | tm.assert_series_equal(res1n, exp1n) |
| 158 | |
| 159 | if isinstance(s1, Index): |
| 160 | tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) |
| 161 | else: |
| 162 | exp = np.array([1, 2, 3], dtype=np.int64) |
| 163 | tm.assert_numpy_array_equal(s1.unique(), exp) |
| 164 | |
| 165 | assert s1.nunique() == 3 |
| 166 | |
| 167 | # these return the same |
| 168 | res4 = s1.value_counts(bins=4, dropna=True) |
| 169 | intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) |
| 170 | exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2]), name="count") |
| 171 | tm.assert_series_equal(res4, exp4) |
| 172 | |
| 173 | res4 = s1.value_counts(bins=4, dropna=False) |
| 174 | intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) |
| 175 | exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2]), name="count") |
| 176 | tm.assert_series_equal(res4, exp4) |
| 177 | |
| 178 | res4n = s1.value_counts(bins=4, normalize=True) |
| 179 | exp4n = Series( |
| 180 | [0.5, 0.25, 0.25, 0], index=intervals.take([0, 1, 3, 2]), name="proportion" |
| 181 | ) |
| 182 | tm.assert_series_equal(res4n, exp4n) |
| 183 | |
| 184 | # handle NA's properly |
| 185 | s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"] |
| 186 | s = klass(s_values) |
| 187 | expected = Series([4, 3, 2], index=["b", "a", "d"], name="count") |
| 188 | tm.assert_series_equal(s.value_counts(), expected) |
| 189 | |
| 190 | if isinstance(s, Index): |
| 191 | exp = Index(["a", "b", np.nan, "d"]) |
| 192 | tm.assert_index_equal(s.unique(), exp) |
| 193 | else: |
| 194 | exp = np.array(["a", "b", np.nan, "d"], dtype=object) |
| 195 | if using_infer_string: |
| 196 | exp = array(exp, dtype="str") |
| 197 | tm.assert_equal(s.unique(), exp) |
| 198 | assert s.nunique() == 3 |