(arr, pat, flags: int = 0)
| 4845 | |
| 4846 | |
| 4847 | def str_extractall(arr, pat, flags: int = 0) -> DataFrame: |
| 4848 | regex = re.compile(pat, flags=flags) |
| 4849 | # the regex must contain capture groups. |
| 4850 | if regex.groups == 0: |
| 4851 | raise ValueError("pattern contains no capture groups") |
| 4852 | |
| 4853 | if isinstance(arr, ABCIndex): |
| 4854 | arr = arr.to_series().reset_index(drop=True).astype(arr.dtype) |
| 4855 | |
| 4856 | columns = _get_group_names(regex) |
| 4857 | match_list = [] |
| 4858 | index_list = [] |
| 4859 | is_mi = arr.index.nlevels > 1 |
| 4860 | |
| 4861 | for subject_key, subject in arr.items(): |
| 4862 | if isinstance(subject, str): |
| 4863 | if not is_mi: |
| 4864 | subject_key = (subject_key,) |
| 4865 | |
| 4866 | for match_i, match_tuple in enumerate(regex.findall(subject)): |
| 4867 | if isinstance(match_tuple, str): |
| 4868 | match_tuple = (match_tuple,) |
| 4869 | na_tuple = [np.nan if group == "" else group for group in match_tuple] |
| 4870 | match_list.append(na_tuple) |
| 4871 | result_key = (*subject_key, match_i) |
| 4872 | index_list.append(result_key) |
| 4873 | |
| 4874 | from pandas import MultiIndex |
| 4875 | |
| 4876 | index = MultiIndex.from_tuples(index_list, names=[*arr.index.names, "match"]) |
| 4877 | dtype = _result_dtype(arr) |
| 4878 | |
| 4879 | result = arr._constructor_expanddim( |
| 4880 | match_list, index=index, columns=columns, dtype=dtype |
| 4881 | ) |
| 4882 | return result |
no test coverage detected