A Bloom filter for instances of {@code T}. A Bloom filter offers an approximate containment test with one-sided error: if it claims that an element is contained in it, this might be in error, but if it claims that an element is <i>not</i> contained in it, then this is definitely true. <p>If you are
| 69 | * @since 11.0 (thread-safe since 23.0) |
| 70 | */ |
| 71 | @Beta |
| 72 | public final class BloomFilter<T extends @Nullable Object> implements Predicate<T>, Serializable { |
| 73 | /** |
| 74 | * A strategy to translate T instances, to {@code numHashFunctions} bit indexes. |
| 75 | * |
| 76 | * <p>Implementations should be collections of pure functions (i.e. stateless). |
| 77 | */ |
| 78 | interface Strategy extends Serializable { |
| 79 | |
| 80 | /** |
| 81 | * Sets {@code numHashFunctions} bits of the given bit array, by hashing a user element. |
| 82 | * |
| 83 | * <p>Returns whether any bits changed as a result of this operation. |
| 84 | */ |
| 85 | <T extends @Nullable Object> boolean put( |
| 86 | @ParametricNullness T object, |
| 87 | Funnel<? super T> funnel, |
| 88 | int numHashFunctions, |
| 89 | LockFreeBitArray bits); |
| 90 | |
| 91 | /** |
| 92 | * Queries {@code numHashFunctions} bits of the given bit array, by hashing a user element; |
| 93 | * returns {@code true} if and only if all selected bits are set. |
| 94 | */ |
| 95 | <T extends @Nullable Object> boolean mightContain( |
| 96 | @ParametricNullness T object, |
| 97 | Funnel<? super T> funnel, |
| 98 | int numHashFunctions, |
| 99 | LockFreeBitArray bits); |
| 100 | |
| 101 | /** |
| 102 | * Identifier used to encode this strategy, when marshalled as part of a BloomFilter. Only |
| 103 | * values in the [-128, 127] range are valid for the compact serial form. Non-negative values |
| 104 | * are reserved for enums defined in BloomFilterStrategies; negative values are reserved for any |
| 105 | * custom, stateful strategy we may define (e.g. any kind of strategy that would depend on user |
| 106 | * input). |
| 107 | */ |
| 108 | int ordinal(); |
| 109 | } |
| 110 | |
| 111 | /** The bit set of the BloomFilter (not necessarily power of 2!) */ |
| 112 | private final LockFreeBitArray bits; |
| 113 | |
| 114 | /** Number of hashes per element */ |
| 115 | private final int numHashFunctions; |
| 116 | |
| 117 | /** The funnel to translate Ts to bytes */ |
| 118 | private final Funnel<? super T> funnel; |
| 119 | |
| 120 | /** The strategy we employ to map an element T to {@code numHashFunctions} bit indexes. */ |
| 121 | private final Strategy strategy; |
| 122 | |
| 123 | /** Natural logarithm of 2, used to optimize calculations in Bloom filter sizing. */ |
| 124 | private static final double LOG_TWO = Math.log(2); |
| 125 | |
| 126 | /** Square of the natural logarithm of 2, reused to optimize the bit size calculation. */ |
| 127 | private static final double SQUARED_LOG_TWO = LOG_TWO * LOG_TWO; |
| 128 |