Skip to content

Commit 96430e1

Browse files
committed
refresh
1 parent 669b791 commit 96430e1

File tree

5 files changed

+1588
-3
lines changed

5 files changed

+1588
-3
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 2
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 2
6+
}

15. NaiveBayes/NaiveBayesMine.ipynb

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 23,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import numpy as np"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 27,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"def fit(X_train,Y_train):\n",
19+
" result = {}\n",
20+
" class_values = set(Y_train)\n",
21+
" for curr_value in class_values:\n",
22+
" result[curr_value] = {}\n",
23+
" result[\"total_data\"] = len(Y_train)\n",
24+
" curr_class_rows = (Y_train == curr_value)\n",
25+
" X_train_curr = X_train[curr_class_rows]\n",
26+
" Y_train_curr = Y_train[curr_class_rows]\n",
27+
" num_features = X_train.shape[1]\n",
28+
" result[curr_value][\"total_count\"] = len(Y_train_curr)\n",
29+
" for j in range(1,num_features+1):\n",
30+
" result[curr_value][j] = {}\n",
31+
" all_possible_values = set(X_train[:,j-1])\n",
32+
" for this_value in all_possible_values:\n",
33+
" result[curr_value][j][this_value] = (X_train_curr[:,j-1]==this_value).sum()\n",
34+
" return result"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": 28,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"def probablity(dictionary,x,current_class):\n",
44+
" output= np.log(dictionary[current_class][\"total_count\"])-np.log(dictionary[\"total_data\"])\n",
45+
" num_features = len(dictionary[current_class].keys())-1;\n",
46+
" for j in range(1,num_features+1):\n",
47+
" xj = x[j-1]\n",
48+
" count_current_class_with_value_xj = dictionary[current_class][j][xj] + 1 \n",
49+
" count_current_class = dictionary[current_class][\"total_count\"] + len(dictionary[current_class][j].keys())\n",
50+
" current_xj_prob = np.log(count_current_class_with_value_xj) -np.log(count_current_class)\n",
51+
" output = output + current_xj_prob\n",
52+
" return output "
53+
]
54+
},
55+
{
56+
"cell_type": "code",
57+
"execution_count": 29,
58+
"metadata": {},
59+
"outputs": [],
60+
"source": [
61+
"def predictSinglePoint(dictionary,x):\n",
62+
" classes = dictionary.keys()\n",
63+
" best_p = -1000\n",
64+
" best_class = -1\n",
65+
" first_run = True\n",
66+
" for current_class in classes:\n",
67+
" if(current_class == \"total_data\"):\n",
68+
" continue\n",
69+
" p_curr_class = probablity(dictionary,x,current_class)\n",
70+
" if(first_run or p_curr_class > best_p):\n",
71+
" best_p = p_curr_class\n",
72+
" best_class = current_class\n",
73+
" first_run = False\n",
74+
" return best_class"
75+
]
76+
},
77+
{
78+
"cell_type": "code",
79+
"execution_count": 30,
80+
"metadata": {},
81+
"outputs": [],
82+
"source": [
83+
"def predict(dictionary,X_test):\n",
84+
" Y_pred = []\n",
85+
" for x in X_test:\n",
86+
" x_class = predictSinglePoint(dictionary,x)\n",
87+
" Y_pred.append(x_class)\n",
88+
" return Y_pred"
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": 31,
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"def makelabelled(column):\n",
98+
" second_limit = column.mean()\n",
99+
" first_limit = 0.5 * second_limit\n",
100+
" third_limit = 1.5 * second_limit\n",
101+
" for i in range(0,len(column)):\n",
102+
" if(column[i]<first_limit):\n",
103+
" column[i] = 0\n",
104+
" elif(column[i] < second_limit):\n",
105+
" column[i] = 1\n",
106+
" elif(column[i]<third_limit):\n",
107+
" column[i] = 2\n",
108+
" else:\n",
109+
" column[i] = 3\n",
110+
" return column"
111+
]
112+
},
113+
{
114+
"cell_type": "code",
115+
"execution_count": 32,
116+
"metadata": {},
117+
"outputs": [],
118+
"source": [
119+
"from sklearn import datasets\n",
120+
"iris = datasets.load_iris()\n",
121+
"x = iris.data\n",
122+
"y = iris.target"
123+
]
124+
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": 33,
128+
"metadata": {},
129+
"outputs": [],
130+
"source": [
131+
"for i in range(0,x.shape[-1]):\n",
132+
" x[:,i] = makelabelled(x[:,i])"
133+
]
134+
},
135+
{
136+
"cell_type": "code",
137+
"execution_count": 34,
138+
"metadata": {},
139+
"outputs": [],
140+
"source": [
141+
"from sklearn import model_selection\n",
142+
"X_train,X_test,Y_train,Y_test = model_selection.train_test_split(x,y,test_size=0.25,random_state=0)"
143+
]
144+
},
145+
{
146+
"cell_type": "code",
147+
"execution_count": 35,
148+
"metadata": {},
149+
"outputs": [],
150+
"source": [
151+
"dictionary = fit(X_train,Y_train)"
152+
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"execution_count": 36,
157+
"metadata": {},
158+
"outputs": [],
159+
"source": [
160+
"Y_pred = predict(dictionary,X_test)"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": 37,
166+
"metadata": {},
167+
"outputs": [
168+
{
169+
"name": "stdout",
170+
"output_type": "stream",
171+
"text": [
172+
" precision recall f1-score support\n",
173+
"\n",
174+
" 0 1.00 1.00 1.00 13\n",
175+
" 1 0.94 1.00 0.97 16\n",
176+
" 2 1.00 0.89 0.94 9\n",
177+
"\n",
178+
"avg / total 0.98 0.97 0.97 38\n",
179+
"\n",
180+
"[[13 0 0]\n",
181+
" [ 0 16 0]\n",
182+
" [ 0 1 8]]\n"
183+
]
184+
}
185+
],
186+
"source": [
187+
"from sklearn.metrics import classification_report,confusion_matrix\n",
188+
"print(classification_report(Y_test,Y_pred))\n",
189+
"print(confusion_matrix(Y_test,Y_pred))"
190+
]
191+
},
192+
{
193+
"cell_type": "markdown",
194+
"metadata": {},
195+
"source": [
196+
"### Implememtation of Multinomial Naive Bayes from Scratch"
197+
]
198+
},
199+
{
200+
"cell_type": "code",
201+
"execution_count": null,
202+
"metadata": {},
203+
"outputs": [],
204+
"source": []
205+
}
206+
],
207+
"metadata": {
208+
"kernelspec": {
209+
"display_name": "Python 3",
210+
"language": "python",
211+
"name": "python3"
212+
},
213+
"language_info": {
214+
"codemirror_mode": {
215+
"name": "ipython",
216+
"version": 3
217+
},
218+
"file_extension": ".py",
219+
"mimetype": "text/x-python",
220+
"name": "python",
221+
"nbconvert_exporter": "python",
222+
"pygments_lexer": "ipython3",
223+
"version": "3.6.4"
224+
}
225+
},
226+
"nbformat": 4,
227+
"nbformat_minor": 2
228+
}

15. NaiveBayes/Untitled.ipynb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 2
6+
}

0 commit comments

Comments
 (0)