This class implements shuffling with a special consistency property. Consistency means that, for a given seed and key function, if two sets of items are shuffled, the resulting order will agree on the intersection of the two sets. For example, if items are removed from an original s
| 637 | |
| 638 | |
| 639 | class Shuffler: |
| 640 | """ |
| 641 | This class implements shuffling with a special consistency property. |
| 642 | Consistency means that, for a given seed and key function, if two sets of |
| 643 | items are shuffled, the resulting order will agree on the intersection of |
| 644 | the two sets. For example, if items are removed from an original set, the |
| 645 | shuffled order for the new set will be the shuffled order of the original |
| 646 | set restricted to the smaller set. |
| 647 | """ |
| 648 | |
| 649 | # This doesn't need to be cryptographically strong, so use what's fastest. |
| 650 | hash_algorithm = "md5" |
| 651 | |
| 652 | @classmethod |
| 653 | def _hash_text(cls, text): |
| 654 | h = hashlib.new(cls.hash_algorithm, usedforsecurity=False) |
| 655 | h.update(text.encode("utf-8")) |
| 656 | return h.hexdigest() |
| 657 | |
| 658 | def __init__(self, seed=None): |
| 659 | if seed is None: |
| 660 | # Limit seeds to 10 digits for simpler output. |
| 661 | seed = random.randint(0, 10**10 - 1) |
| 662 | seed_source = "generated" |
| 663 | else: |
| 664 | seed_source = "given" |
| 665 | self.seed = seed |
| 666 | self.seed_source = seed_source |
| 667 | |
| 668 | @property |
| 669 | def seed_display(self): |
| 670 | return f"{self.seed!r} ({self.seed_source})" |
| 671 | |
| 672 | def _hash_item(self, item, key): |
| 673 | text = "{}{}".format(self.seed, key(item)) |
| 674 | return self._hash_text(text) |
| 675 | |
| 676 | def shuffle(self, items, key): |
| 677 | """ |
| 678 | Return a new list of the items in a shuffled order. |
| 679 | |
| 680 | The `key` is a function that accepts an item in `items` and returns |
| 681 | a string unique for that item that can be viewed as a string id. The |
| 682 | order of the return value is deterministic. It depends on the seed |
| 683 | and key function but not on the original order. |
| 684 | """ |
| 685 | hashes = {} |
| 686 | for item in items: |
| 687 | hashed = self._hash_item(item, key) |
| 688 | if hashed in hashes: |
| 689 | msg = "item {!r} has same hash {!r} as item {!r}".format( |
| 690 | item, |
| 691 | hashed, |
| 692 | hashes[hashed], |
| 693 | ) |
| 694 | raise RuntimeError(msg) |
| 695 | hashes[hashed] = item |
| 696 | return [hashes[hashed] for hashed in sorted(hashes)] |
no outgoing calls