-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
210 lines (201 loc) · 8.6 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width" />
<title>Feature selection with Boruta.js online | StatSim Select</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/css/materialize.min.css">
<link type="text/css" rel="stylesheet" href="https://statsim.com/port/css/port.css" media="screen"/>
<link rel="icon" type="image/png" href="https://statsim.com/app/images/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="https://statsim.com/app/images/favicon-16x16.png" sizes="16x16">
<link type="text/css" rel="stylesheet" href="https://statsim.com/assets/common.css" media="screen"/>
<style>
a { color: #19BCB0 }
.rejected { color: #CE2A62 }
.file-field .btn { background: #BBB }
.file-field .btn:hover { background: #AAA }
.confirmed { color: #19BCB0 }
.grey-bar { background: #f5f5f5 }
.custom-active {
background: #22BEB3 !important;
background-image: linear-gradient(141deg, #27c9ba 0%, #1fc8db 51%, #2cb5e8 75%) !important;
}
.custom-active:hover {
opacity: 0.85;
}
.custom-hidden { display: none !important }
.input-field { margin-bottom: 0 }
.greeting { margin-top: 40px;}
#port-container { margin: 70px 0 }
#description { margin-top: 0 }
</style>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-7770107-2"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-7770107-2');
</script>
</head>
<body>
<div class="status-bar grey-bar">
<div class="container">
<div class="row">
<div class="col s12" style="font-size: 14px;">
<div id="menu"></div>
<a href="">StatSim</a> → <b>Select</b>
<span class="version">Version: 0.2.0</span>
</div>
</div>
</div>
</div>
<div class="container">
<div class="row">
<div id="port-container"></div>
</div>
</div>
<div id="description" class="grey-bar">
<div class="container">
<div class="row">
<div class="col m12">
<h1>Feature selection with Boruta.js online</h1>
<h2>Find all relevant variables in a dataset using a robust feature importance method</h2>
<p>Feature selection methods try to identify important variables in a dataset. Many of those methods search for a minimal feature subset that can predict a target variable well. When you want to understand relationships between variables, it's more important to select all relevant variables, not just the minimal set. Boruta (<a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.232.7660&rep=rep1&type=pdf">Kursa, Rudnicki 2010</a>) is one of the most advanced methods for such all-relevant feature selection. We <a href="https://github.com/zemlyansky/boruta.js">ported</a> the original Boruta algorithm to JavaScript, so you can use it online without installing R, Python, or sending data to some web server. All processing happens in the browser, on your local machine. </p>
</div>
</div>
<div class="row features">
<div class="col m3 feature">
<h3>
Select all relevant variables
</h3>
<p>
Boruta makes it possible to select all critical features, not a minimal subset of them. That works even when some of them are correlated.
</p>
</div>
<div class="col m3 feature">
<h3>
Compare with shadow features
</h3>
<p>
The script adds additional "shadow" variables during data processing. Comparing with them, you can check how variables of a dataset are better than noize.
</p>
</div>
<div class="col m3 feature">
<h3>
Choose from multiple models
</h3>
<p>
The original Boruta lib uses the Random Forest method under the hood. We added more models that Boruta can use to calculate feature importance.
</p>
</div>
<div class="col m3 feature">
<h3>
Make statistically grounded conclusions
</h3>
<p>
Instead of having a hard threshold for feature selection, the script uses a probabilistic approach to reject or accept variables in a dataset.
</p>
</div>
</div>
<div class="row">
<div class="col m12">
<small>
All processing and visualization happens in your browser. We don't see, collect or sell data you explore <br>
</small>
<p>
<a class="github-button" href="https://github.com/statsim/select" data-icon="octicon-star" data-show-count="true" aria-label="Star statsim/select on GitHub">Star</a>
<a class="github-button" href="https://github.com/statsim/select/issues" data-icon="octicon-issue-opened" data-show-count="true" aria-label="Issue statsim/select on GitHub">Issue</a>
</p>
</div>
</div>
</div>
</div>
<script async defer src="https://buttons.github.io/buttons.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/js/materialize.min.js"></script>
<script src="https://statsim.com/port/dist/port.js"></script>
<script src="https://statsim.com/assets/common.js"></script>
<script>
var port = new Port({
portContainer: document.getElementById('port-container'),
schema: {
"model": {
"name": "Process",
"method": "run",
"type": "class",
"url": "process.js",
"worker": true,
},
"render": {
"name": "Render",
"method": "render",
"type": "class",
"url": "render.js"
},
"design": {
"layout": "sidebar",
"colors": "light"
},
"inputs": [
{ "type": "file", "name": "File", "reactive": true },
{ "type": "select", "name": "Target variable" },
{ "type": "select", "name": "Base model",
"options": [
"Linear Regression",
"Logistic Regression",
"KNN Classifier",
"RF Auto",
"RF Classifier",
"RF Regression",
// "XGB Classifier",
// "XGB Regression",
],
"default": "RF Auto",
"onchange": (value) => {
if (value === 'Linear Regression' || value === 'Logistic Regression' || value === 'KNN Classifier') {
return {
'N estimators': {'className': 'hidden'},
'Max depth': {'className': 'hidden'},
}
} else {
return {
'N estimators': {'className': ''},
'Max depth': {'className': ''},
}
}
}
},
{ "type": "int", "name": "Max runs", "default": 60},
{ "type": "float", "name": "P value", "default": 0.01},
{ "type": "int", "name": "N repeats", "default": 5},
{ "type": "int", "name": "N estimators", "default": 100 },
{ "type": "int", "name": "Max depth", "default": 6 },
]
}
})
</script>
<script>
fileElement = document.querySelector('input[type="file"]')
fileElementBtn= document.querySelector('.file-field .btn')
otherElements = document.querySelectorAll('.input-field:not(.file-field), #run')
greeting = document.createElement('p')
greeting.innerHTML = 'Choose a <b>CSV</b> file for feature selection'
greeting.classList.add('greeting')
outputs.appendChild(greeting)
fileElementBtn.classList.add('custom-active')
for (el of otherElements) {
el.classList.add('custom-hidden')
}
fileElement.addEventListener('change', (e) => {
if (e.target.files.length) {
greeting.classList.add('hidden')
fileElementBtn.classList.remove('custom-active')
document.getElementById('run').classList.add('custom-active')
for (el of otherElements) {
el.classList.remove('custom-hidden')
}
}
})
</script>
</body>
</html>`