-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathstepwise-regression.py
39 lines (35 loc) · 1.41 KB
/
stepwise-regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import statsmodels.formula.api as smf
def forward_selected(data, response):
"""Linear model designed by forward selection.
Parameters:
-----------
data : pandas DataFrame with all possible predictors and response
response: string, name of response column in data
Returns:
--------
model: an "optimal" fitted statsmodels linear model
with an intercept
selected by forward selection
evaluated by adjusted R-squared
"""
remaining = set(data.columns)
remaining.remove(response)
selected = []
current_score, best_new_score = 0.0, 0.0
while remaining and current_score == best_new_score:
scores_with_candidates = []
for candidate in remaining:
formula = "{} ~ {} + 1".format(response,
' + '.join(selected + [candidate]))
score = smf.ols(formula, data).fit().rsquared_adj
scores_with_candidates.append((score, candidate))
scores_with_candidates.sort()
best_new_score, best_candidate = scores_with_candidates.pop()
if current_score < best_new_score:
remaining.remove(best_candidate)
selected.append(best_candidate)
current_score = best_new_score
formula = "{} ~ {} + 1".format(response,
' + '.join(selected))
model = smf.ols(formula, data).fit()
return model