-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAssignment2Question4
85 lines (71 loc) · 2.55 KB
/
Assignment2Question4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from itertools import product
from string import ascii_lowercase
import random
import string
import hashlib
#We are creating all possible three letter strings
#We add the first thousand to our bloom filter
#We use the rest (16,756) to test for false positives
#The graph below adds one item at a time and compares
#the other 16,756 possible strings and counts the total
#number of false positives. Then this is compared to the
#theoretical number
m = 1000 #size of the bloom filter
n = 17576 #total number of three letter strings
# k = 4
words = [''.join(i) for i in product(ascii_lowercase, repeat = 3)]
bloomfilter = [0] * m
lst = words[0:999] #this array will contain the first 1000 three letter words
test = words[1000:]
falsepositives = []
def add(word):
a = hash(word)%m
b = int(hashlib.md5(word.encode('utf-8')).hexdigest(),16)%m
c = int(hashlib.sha1(word.encode('utf-8')).hexdigest(),16)%m
d = int(hashlib.sha224(word.encode('utf-8')).hexdigest(),16)%m
bloomfilter[a-1] += 1
bloomfilter[b-1] += 1
bloomfilter[c-1] += 1
bloomfilter[d-1] += 1
def contain(word):
a = hash(word)%m
b = int(hashlib.md5(word.encode('utf-8')).hexdigest(),16)%m
c = int(hashlib.sha1(word.encode('utf-8')).hexdigest(),16)%m
d = int(hashlib.sha224(word.encode('utf-8')).hexdigest(),16)%m
if bloomfilter[a-1]>0 and bloomfilter[b-1]>0 and bloomfilter[c-1]>0 and bloomfilter[d-1]>0:
return True
else:
return False
def delete(word):
if contain(word) == True:
a = hash(word)%m
b = int(hashlib.md5(word.encode('utf-8')).hexdigest(),16)%m
c = int(hashlib.sha1(word.encode('utf-8')).hexdigest(),16)%m
d = int(hashlib.sha224(word.encode('utf-8')).hexdigest(),16)%m
bloomfilter[a-1] += -1
bloomfilter[b-1] += -1
bloomfilter[c-1] += -1
bloomfilter[d-1] += -1
for i in range(len(lst)): #for every word we add to Bloom Filter
add(lst[i]) #we calculate the total number of false positives
var = 0
for j in range(len(test)):
if contain(test[j]) == True:
var += 1
falsepositives.append(var)
##Graph##
import matplotlib.pyplot as plt
import math
x = []
expectedrate = [0]
for i in range(len(lst)):
x.append(i)
falsepositives[i] = falsepositives[i]/16576 #false positive rate = total number of false positives / total number of observations
for i in range(1,len(lst)):
expectedrate.append(pow(1 - math.exp(-4 / (1000 / i)), 4))
plt.plot(x, falsepositives)
plt.plot(x, expectedrate)
plt.xlabel('Number of items')
plt.ylabel('False positive rate')
plt.title('False positive rate as function of n')
plt.show()