MCPcopy
hub / github.com/pandas-dev/pandas / str_extractall

Function str_extractall

pandas/core/strings/accessor.py:4847–4882  ·  view source on GitHub ↗
(arr, pat, flags: int = 0)

Source from the content-addressed store, hash-verified

4845
4846
4847def str_extractall(arr, pat, flags: int = 0) -> DataFrame:
4848 regex = re.compile(pat, flags=flags)
4849 # the regex must contain capture groups.
4850 if regex.groups == 0:
4851 raise ValueError("pattern contains no capture groups")
4852
4853 if isinstance(arr, ABCIndex):
4854 arr = arr.to_series().reset_index(drop=True).astype(arr.dtype)
4855
4856 columns = _get_group_names(regex)
4857 match_list = []
4858 index_list = []
4859 is_mi = arr.index.nlevels > 1
4860
4861 for subject_key, subject in arr.items():
4862 if isinstance(subject, str):
4863 if not is_mi:
4864 subject_key = (subject_key,)
4865
4866 for match_i, match_tuple in enumerate(regex.findall(subject)):
4867 if isinstance(match_tuple, str):
4868 match_tuple = (match_tuple,)
4869 na_tuple = [np.nan if group == "" else group for group in match_tuple]
4870 match_list.append(na_tuple)
4871 result_key = (*subject_key, match_i)
4872 index_list.append(result_key)
4873
4874 from pandas import MultiIndex
4875
4876 index = MultiIndex.from_tuples(index_list, names=[*arr.index.names, "match"])
4877 dtype = _result_dtype(arr)
4878
4879 result = arr._constructor_expanddim(
4880 match_list, index=index, columns=columns, dtype=dtype
4881 )
4882 return result

Callers 1

extractallMethod · 0.85

Calls 10

_get_group_namesFunction · 0.85
_result_dtypeFunction · 0.85
findallMethod · 0.80
astypeMethod · 0.45
reset_indexMethod · 0.45
to_seriesMethod · 0.45
itemsMethod · 0.45
appendMethod · 0.45
from_tuplesMethod · 0.45

Tested by

no test coverage detected