MCPcopy
hub / github.com/pandas-dev/pandas / _do_convert_categoricals

Method _do_convert_categoricals

pandas/io/stata.py:1884–1960  ·  view source on GitHub ↗

Converts categorical columns to Categorical type.

(
        self,
        data: DataFrame,
        value_label_dict: dict[str, dict[int, str]],
        lbllist: Sequence[str],
        order_categoricals: bool,
    )

Source from the content-addressed store, hash-verified

1882 return data[columns]
1883
1884 def _do_convert_categoricals(
1885 self,
1886 data: DataFrame,
1887 value_label_dict: dict[str, dict[int, str]],
1888 lbllist: Sequence[str],
1889 order_categoricals: bool,
1890 ) -> DataFrame:
1891 """
1892 Converts categorical columns to Categorical type.
1893 """
1894 if not value_label_dict:
1895 return data
1896 cat_converted_data = []
1897 for col, label in zip(data, lbllist, strict=True):
1898 if label in value_label_dict:
1899 # Explicit call with ordered=True
1900 vl = value_label_dict[label]
1901 keys = np.array(list(vl.keys()))
1902 column = data[col]
1903 key_matches = column.isin(keys)
1904 if self._using_iterator and key_matches.all():
1905 initial_categories: np.ndarray | None = keys
1906 # If all categories are in the keys and we are iterating,
1907 # use the same keys for all chunks. If some are missing
1908 # value labels, then we will fall back to the categories
1909 # varying across chunks.
1910 else:
1911 if self._using_iterator:
1912 # warn is using an iterator
1913 warnings.warn(
1914 categorical_conversion_warning,
1915 CategoricalConversionWarning,
1916 stacklevel=find_stack_level(),
1917 )
1918 initial_categories = None
1919 cat_data = Categorical(
1920 column, categories=initial_categories, ordered=order_categoricals
1921 )
1922 if initial_categories is None:
1923 # If None here, then we need to match the cats in the Categorical
1924 categories = []
1925 for category in cat_data.categories:
1926 if category in vl:
1927 categories.append(vl[category])
1928 else:
1929 categories.append(category)
1930 else:
1931 # If all cats are matched, we can use the values
1932 categories = list(vl.values())
1933 try:
1934 # Try to catch duplicate categories
1935 # TODO: if we get a non-copying rename_categories, use that
1936 cat_data = cat_data.rename_categories(categories)
1937 except ValueError as err:
1938 vc = Series(categories, copy=False).value_counts()
1939 repeated_cats = list(vc.index[vc > 1])
1940 repeats = "-" * 80 + "\n" + "\n".join(repeated_cats)
1941 # GH 25772

Callers 1

readMethod · 0.95

Calls 13

rename_categoriesMethod · 0.95
find_stack_levelFunction · 0.90
CategoricalClass · 0.90
SeriesClass · 0.90
DataFrameClass · 0.90
arrayMethod · 0.45
keysMethod · 0.45
isinMethod · 0.45
allMethod · 0.45
appendMethod · 0.45
valuesMethod · 0.45
value_countsMethod · 0.45

Tested by

no test coverage detected