MCPcopy
hub / github.com/pandas-dev/pandas / test_extractall

Function test_extractall

pandas/tests/strings/test_extract.py:397–479  ·  view source on GitHub ↗
(any_string_dtype)

Source from the content-addressed store, hash-verified

395
396
397def test_extractall(any_string_dtype):
398 data = [
399 "dave@google.com",
400 "tdhock5@gmail.com",
401 "maudelaperriere@gmail.com",
402 "rob@gmail.com some text steve@gmail.com",
403 "a@b.com some text c@d.com and e@f.com",
404 np.nan,
405 "",
406 ]
407 expected_tuples = [
408 ("dave", "google", "com"),
409 ("tdhock5", "gmail", "com"),
410 ("maudelaperriere", "gmail", "com"),
411 ("rob", "gmail", "com"),
412 ("steve", "gmail", "com"),
413 ("a", "b", "com"),
414 ("c", "d", "com"),
415 ("e", "f", "com"),
416 ]
417 pat = r"""
418 (?P<user>[a-z0-9]+)
419 @
420 (?P<domain>[a-z]+)
421 \.
422 (?P<tld>[a-z]{2,4})
423 """
424 expected_columns = ["user", "domain", "tld"]
425 s = Series(data, dtype=any_string_dtype)
426 # extractall should return a DataFrame with one row for each match, indexed by the
427 # subject from which the match came.
428 expected_index = MultiIndex.from_tuples(
429 [(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (4, 1), (4, 2)],
430 names=(None, "match"),
431 )
432 expected = DataFrame(
433 expected_tuples, expected_index, expected_columns, dtype=any_string_dtype
434 )
435 result = s.str.extractall(pat, flags=re.VERBOSE)
436 tm.assert_frame_equal(result, expected)
437
438 # The index of the input Series should be used to construct the index of the output
439 # DataFrame:
440 mi = MultiIndex.from_tuples(
441 [
442 ("single", "Dave"),
443 ("single", "Toby"),
444 ("single", "Maude"),
445 ("multiple", "robAndSteve"),
446 ("multiple", "abcdef"),
447 ("none", "missing"),
448 ("none", "empty"),
449 ]
450 )
451 s = Series(data, index=mi, dtype=any_string_dtype)
452 expected_index = MultiIndex.from_tuples(
453 [
454 ("single", "Dave", 0),

Callers

nothing calls this directly

Calls 4

SeriesClass · 0.90
DataFrameClass · 0.90
extractallMethod · 0.80
from_tuplesMethod · 0.45

Tested by

no test coverage detected