(any_string_dtype)
| 395 | |
| 396 | |
| 397 | def test_extractall(any_string_dtype): |
| 398 | data = [ |
| 399 | "dave@google.com", |
| 400 | "tdhock5@gmail.com", |
| 401 | "maudelaperriere@gmail.com", |
| 402 | "rob@gmail.com some text steve@gmail.com", |
| 403 | "a@b.com some text c@d.com and e@f.com", |
| 404 | np.nan, |
| 405 | "", |
| 406 | ] |
| 407 | expected_tuples = [ |
| 408 | ("dave", "google", "com"), |
| 409 | ("tdhock5", "gmail", "com"), |
| 410 | ("maudelaperriere", "gmail", "com"), |
| 411 | ("rob", "gmail", "com"), |
| 412 | ("steve", "gmail", "com"), |
| 413 | ("a", "b", "com"), |
| 414 | ("c", "d", "com"), |
| 415 | ("e", "f", "com"), |
| 416 | ] |
| 417 | pat = r""" |
| 418 | (?P<user>[a-z0-9]+) |
| 419 | @ |
| 420 | (?P<domain>[a-z]+) |
| 421 | \. |
| 422 | (?P<tld>[a-z]{2,4}) |
| 423 | """ |
| 424 | expected_columns = ["user", "domain", "tld"] |
| 425 | s = Series(data, dtype=any_string_dtype) |
| 426 | # extractall should return a DataFrame with one row for each match, indexed by the |
| 427 | # subject from which the match came. |
| 428 | expected_index = MultiIndex.from_tuples( |
| 429 | [(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (4, 1), (4, 2)], |
| 430 | names=(None, "match"), |
| 431 | ) |
| 432 | expected = DataFrame( |
| 433 | expected_tuples, expected_index, expected_columns, dtype=any_string_dtype |
| 434 | ) |
| 435 | result = s.str.extractall(pat, flags=re.VERBOSE) |
| 436 | tm.assert_frame_equal(result, expected) |
| 437 | |
| 438 | # The index of the input Series should be used to construct the index of the output |
| 439 | # DataFrame: |
| 440 | mi = MultiIndex.from_tuples( |
| 441 | [ |
| 442 | ("single", "Dave"), |
| 443 | ("single", "Toby"), |
| 444 | ("single", "Maude"), |
| 445 | ("multiple", "robAndSteve"), |
| 446 | ("multiple", "abcdef"), |
| 447 | ("none", "missing"), |
| 448 | ("none", "empty"), |
| 449 | ] |
| 450 | ) |
| 451 | s = Series(data, index=mi, dtype=any_string_dtype) |
| 452 | expected_index = MultiIndex.from_tuples( |
| 453 | [ |
| 454 | ("single", "Dave", 0), |
nothing calls this directly
no test coverage detected