r""" Extract capture groups in the regex `pat` as columns in a DataFrame. For each subject string in the Series, extract groups from the first match of regular expression `pat`. Parameters ---------- pat : str Regular expression pattern w
(
self, pat: str, flags: int = 0, expand: bool = True
)
| 3330 | |
| 3331 | @forbid_nonstring_types(["bytes"]) |
| 3332 | def extract( |
| 3333 | self, pat: str, flags: int = 0, expand: bool = True |
| 3334 | ) -> DataFrame | Series | Index: |
| 3335 | r""" |
| 3336 | Extract capture groups in the regex `pat` as columns in a DataFrame. |
| 3337 | |
| 3338 | For each subject string in the Series, extract groups from the |
| 3339 | first match of regular expression `pat`. |
| 3340 | |
| 3341 | Parameters |
| 3342 | ---------- |
| 3343 | pat : str |
| 3344 | Regular expression pattern with capturing groups. |
| 3345 | flags : int, default 0 (no flags) |
| 3346 | Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that |
| 3347 | modify regular expression matching for things like case, |
| 3348 | spaces, etc. For more details, see :mod:`re`. |
| 3349 | expand : bool, default True |
| 3350 | If True, return DataFrame with one column per capture group. |
| 3351 | If False, return a Series/Index if there is one capture group |
| 3352 | or DataFrame if there are multiple capture groups. |
| 3353 | |
| 3354 | Returns |
| 3355 | ------- |
| 3356 | DataFrame or Series or Index |
| 3357 | A DataFrame with one row for each subject string, and one |
| 3358 | column for each group. Any capture group names in regular |
| 3359 | expression pat will be used for column names; otherwise |
| 3360 | capture group numbers will be used. The dtype of each result |
| 3361 | column is always object, even when no match is found. If |
| 3362 | ``expand=False`` and pat has only one capture group, then |
| 3363 | return a Series (if subject is a Series) or Index (if subject |
| 3364 | is an Index). |
| 3365 | |
| 3366 | See Also |
| 3367 | -------- |
| 3368 | extractall : Returns all matches (not just the first match). |
| 3369 | |
| 3370 | Examples |
| 3371 | -------- |
| 3372 | A pattern with two groups will return a DataFrame with two columns. |
| 3373 | Non-matches will be NaN. |
| 3374 | |
| 3375 | >>> s = pd.Series(["a1", "b2", "c3"]) |
| 3376 | >>> s.str.extract(r"([ab])(\d)") |
| 3377 | 0 1 |
| 3378 | 0 a 1 |
| 3379 | 1 b 2 |
| 3380 | 2 NaN NaN |
| 3381 | |
| 3382 | A pattern may contain optional groups. |
| 3383 | |
| 3384 | >>> s.str.extract(r"([ab])?(\d)") |
| 3385 | 0 1 |
| 3386 | 0 a 1 |
| 3387 | 1 b 2 |
| 3388 | 2 NaN 3 |
| 3389 |
nothing calls this directly
no test coverage detected