MCPcopy
hub / github.com/pandas-dev/pandas / test_hashtable_unique

Method test_hashtable_unique

pandas/tests/test_algos.py:1724–1751  ·  view source on GitHub ↗
(self, htable, data, writable)

Source from the content-addressed store, hash-verified

1722 ],
1723 )
1724 def test_hashtable_unique(self, htable, data, writable):
1725 # output of maker has guaranteed unique elements
1726 s = Series(data, dtype=data.dtype)
1727 if htable == ht.Float64HashTable:
1728 # add NaN for float column
1729 s.loc[500] = np.nan
1730 elif htable == ht.PyObjectHashTable:
1731 # use different NaN types for object column
1732 s.loc[500:502] = [np.nan, None, NaT]
1733
1734 # create duplicated selection
1735 s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
1736 s_duplicated.values.setflags(write=writable)
1737
1738 # drop_duplicates has own cython code (hash_table_func_helper.pxi)
1739 # and is tested separately; keeps first occurrence like ht.unique()
1740 expected_unique = s_duplicated.drop_duplicates(keep="first").values
1741 result_unique = htable().unique(s_duplicated.values)
1742 tm.assert_numpy_array_equal(result_unique, expected_unique)
1743
1744 # test return_inverse=True
1745 # reconstruction can only succeed if the inverse is correct
1746 result_unique, result_inverse = htable().unique(
1747 s_duplicated.values, return_inverse=True
1748 )
1749 tm.assert_numpy_array_equal(result_unique, expected_unique)
1750 reconstr = result_unique[result_inverse]
1751 tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
1752
1753 @pytest.mark.parametrize(
1754 "htable, data",

Callers

nothing calls this directly

Calls 5

SeriesClass · 0.90
reset_indexMethod · 0.45
sampleMethod · 0.45
drop_duplicatesMethod · 0.45
uniqueMethod · 0.45

Tested by

no test coverage detected