MCPcopy
hub / github.com/pandas-dev/pandas / _check_column_names

Method _check_column_names

pandas/io/stata.py:2651–2722  ·  view source on GitHub ↗

Checks column names to ensure that they are valid Stata column names. This includes checks for: * Non-string names * Stata keywords * Variables that start with numbers * Variables with names that are too long When an illegal v

(self, data: DataFrame)

Source from the content-addressed store, hash-verified

2649 return name
2650
2651 def _check_column_names(self, data: DataFrame) -> DataFrame:
2652 """
2653 Checks column names to ensure that they are valid Stata column names.
2654 This includes checks for:
2655 * Non-string names
2656 * Stata keywords
2657 * Variables that start with numbers
2658 * Variables with names that are too long
2659
2660 When an illegal variable name is detected, it is converted, and if
2661 dates are exported, the variable name is propagated to the date
2662 conversion dictionary
2663 """
2664 converted_names: dict[Hashable, str] = {}
2665 columns = list(data.columns)
2666 original_columns = columns[:]
2667
2668 duplicate_var_id = 0
2669 for j, name in enumerate(columns):
2670 orig_name = name
2671 if not isinstance(name, str):
2672 name = str(name)
2673
2674 name = self._validate_variable_name(name)
2675
2676 # Variable name must not be a reserved word
2677 if name in self.RESERVED_WORDS:
2678 name = "_" + name
2679
2680 # Variable name may not start with a number
2681 if "0" <= name[0] <= "9":
2682 name = "_" + name
2683
2684 name = name[: min(len(name), 32)]
2685
2686 if not name == orig_name:
2687 # check for duplicates
2688 while columns.count(name) > 0:
2689 # prepend ascending number to avoid duplicates
2690 name = "_" + str(duplicate_var_id) + name
2691 name = name[: min(len(name), 32)]
2692 duplicate_var_id += 1
2693 converted_names[orig_name] = name
2694
2695 columns[j] = name
2696
2697 data.columns = Index(columns)
2698
2699 # Check date conversion, and fix key if needed
2700 if self._convert_dates:
2701 for c, o in zip(columns, original_columns, strict=True):
2702 if c != o:
2703 self._convert_dates[c] = self._convert_dates[o]
2704 del self._convert_dates[o]
2705
2706 if converted_names:
2707 conversion_warning = []
2708 for orig_name, name in converted_names.items():

Callers 1

_prepare_pandasMethod · 0.95

Calls 10

_update_strl_namesMethod · 0.95
IndexClass · 0.90
find_stack_levelFunction · 0.90
minFunction · 0.85
countMethod · 0.45
itemsMethod · 0.45
appendMethod · 0.45
formatMethod · 0.45
joinMethod · 0.45

Tested by

no test coverage detected