Skip to content
This repository was archived by the owner on Jul 7, 2020. It is now read-only.

Commit 0e2adba

Browse files
committed
Add cardinality support to BloomFilter.
1 parent dd912cc commit 0e2adba

File tree

2 files changed

+29
-2
lines changed

2 files changed

+29
-2
lines changed

src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ public boolean isPresent(byte[] key) {
8383
return true;
8484
}
8585

86+
public long cardinality() {
87+
int m = buckets();
88+
double fractionOfBits = ((double) filter_.cardinality()) / m;
89+
return Math.round(-m * Math.log1p(-fractionOfBits) / hashCount);
90+
}
91+
8692
/*
8793
@param key -- value whose hash is used to fill
8894
the filter_.

src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* KIND, either express or implied. See the License for the
1616
* specific language governing permissions and limitations
1717
* under the License.
18-
*
18+
*
1919
*/
2020
package com.clearspring.analytics.stream.membership;
2121

@@ -111,6 +111,27 @@ public void testSerialize() throws IOException {
111111
FilterTest.testSerialize(bf);
112112
}
113113

114+
@Test
115+
public void testCardinalityOne() {
116+
bf.add("a");
117+
assertEquals(1, bf.cardinality());
118+
}
119+
120+
@Test
121+
public void testCardinalityTwo() {
122+
bf.add("a");
123+
bf.add("b");
124+
assertEquals(2, bf.cardinality());
125+
}
126+
127+
@Test
128+
public void testCardinalityHundred() {
129+
for (int i = 0; i < 100; ++i) {
130+
bf.add(Integer.toString(i));
131+
}
132+
assertEquals(100, bf.cardinality());
133+
}
134+
114135
@Test
115136
public void testGetFalsePositiveProbability() {
116137
// These probabilities are taken from the bloom filter probability table at
@@ -204,7 +225,7 @@ public void testHashCount() {
204225

205226
instance = new BloomFilter(1, 7);
206227
assertEquals(5, instance.getHashCount());
207-
228+
208229
/*
209230
* Although technically 8*ln(2) = 5.545...
210231
* we round down here for speed

0 commit comments

Comments
 (0)