Finds the longest common suffix or prefix of all the values in a series
(series: Any, xfix: str = "suffix")
| 727 | |
| 728 | |
| 729 | def get_common_xfix(series: Any, xfix: str = "suffix") -> str: |
| 730 | """ |
| 731 | Finds the longest common suffix or prefix of all the values in a series |
| 732 | """ |
| 733 | common_xfix = "" |
| 734 | while True: |
| 735 | common_xfixes = ( |
| 736 | series.str[-(len(common_xfix) + 1) :] if xfix == "suffix" else series.str[: len(common_xfix) + 1] |
| 737 | ) # first few or last few characters |
| 738 | if common_xfixes.nunique() != 1: # we found the character at which we don't have a unique xfix anymore |
| 739 | break |
| 740 | elif common_xfix == common_xfixes.values[0]: # the entire first row is a prefix of every other row |
| 741 | break |
| 742 | else: # the first or last few characters are still common across all rows - let's try to add one more |
| 743 | common_xfix = common_xfixes.values[0] |
| 744 | return common_xfix |
| 745 | |
| 746 | |
| 747 | Validator: TypeAlias = "Callable[[pd.DataFrame], Remediation | None]" |
no outgoing calls
no test coverage detected