Checks column names to ensure that they are valid Stata column names. This includes checks for: * Non-string names * Stata keywords * Variables that start with numbers * Variables with names that are too long When an illegal v
(self, data: DataFrame)
| 2649 | return name |
| 2650 | |
| 2651 | def _check_column_names(self, data: DataFrame) -> DataFrame: |
| 2652 | """ |
| 2653 | Checks column names to ensure that they are valid Stata column names. |
| 2654 | This includes checks for: |
| 2655 | * Non-string names |
| 2656 | * Stata keywords |
| 2657 | * Variables that start with numbers |
| 2658 | * Variables with names that are too long |
| 2659 | |
| 2660 | When an illegal variable name is detected, it is converted, and if |
| 2661 | dates are exported, the variable name is propagated to the date |
| 2662 | conversion dictionary |
| 2663 | """ |
| 2664 | converted_names: dict[Hashable, str] = {} |
| 2665 | columns = list(data.columns) |
| 2666 | original_columns = columns[:] |
| 2667 | |
| 2668 | duplicate_var_id = 0 |
| 2669 | for j, name in enumerate(columns): |
| 2670 | orig_name = name |
| 2671 | if not isinstance(name, str): |
| 2672 | name = str(name) |
| 2673 | |
| 2674 | name = self._validate_variable_name(name) |
| 2675 | |
| 2676 | # Variable name must not be a reserved word |
| 2677 | if name in self.RESERVED_WORDS: |
| 2678 | name = "_" + name |
| 2679 | |
| 2680 | # Variable name may not start with a number |
| 2681 | if "0" <= name[0] <= "9": |
| 2682 | name = "_" + name |
| 2683 | |
| 2684 | name = name[: min(len(name), 32)] |
| 2685 | |
| 2686 | if not name == orig_name: |
| 2687 | # check for duplicates |
| 2688 | while columns.count(name) > 0: |
| 2689 | # prepend ascending number to avoid duplicates |
| 2690 | name = "_" + str(duplicate_var_id) + name |
| 2691 | name = name[: min(len(name), 32)] |
| 2692 | duplicate_var_id += 1 |
| 2693 | converted_names[orig_name] = name |
| 2694 | |
| 2695 | columns[j] = name |
| 2696 | |
| 2697 | data.columns = Index(columns) |
| 2698 | |
| 2699 | # Check date conversion, and fix key if needed |
| 2700 | if self._convert_dates: |
| 2701 | for c, o in zip(columns, original_columns, strict=True): |
| 2702 | if c != o: |
| 2703 | self._convert_dates[c] = self._convert_dates[o] |
| 2704 | del self._convert_dates[o] |
| 2705 | |
| 2706 | if converted_names: |
| 2707 | conversion_warning = [] |
| 2708 | for orig_name, name in converted_names.items(): |
no test coverage detected