1010
1111import numpy as np
1212import xgboost as xgb
13- import pandas as pd
13+ import pandas as pd
1414
1515from sklearn .model_selection import train_test_split
1616from sklearn .preprocessing import RobustScaler
2424
2525
2626class HarvesterMaintenance ():
27-
27+
2828 def __init__ (self , model_name : str ):
2929 self .model_name = model_name
3030 self .file = ''
@@ -39,21 +39,21 @@ def __init__(self, model_name: str):
3939 self .run_id = ''
4040 self .active_experiment = ''
4141 self .xgb_model = ''
42-
42+
4343 def mlflow_tracking (self , tracking_uri : str = './mlruns' , experiment : str = None , new_experiment : str = None ):
44-
44+
4545 # sets tracking URI
4646 mlflow .set_tracking_uri (tracking_uri )
47-
47+
4848 # creates new experiment if no experiment is specified
49- if experiment == None :
49+ if experiment is None :
5050 mlflow .create_experiment (new_experiment )
5151 self .active_experiment = new_experiment
5252 mlflow .set_experiment (new_experiment )
5353 else :
5454 mlflow .set_experiment (experiment )
5555 self .active_experiment = experiment
56-
56+
5757 def process_data (self , file : str , test_size : int = .25 ):
5858 """processes raw data for training
5959
@@ -72,11 +72,11 @@ def process_data(self, file: str, test_size: int = .25):
7272 except FileNotFoundError :
7373 sys .exit ('Dataset file not found' )
7474
75-
7675 X = data .drop ('Asset_Label' , axis = 1 )
7776 y = data .Asset_Label
7877
79- X_train , X_test , self .y_train , self .y_test = train_test_split (X , y , test_size = test_size )
78+ X_train , X_test , self .y_train , self .y_test = train_test_split (
79+ X , y , test_size = test_size )
8080
8181 df_num_train = X_train .select_dtypes (['float' , 'int' , 'int32' ])
8282 df_num_test = X_test .select_dtypes (['float' , 'int' , 'int32' ])
@@ -97,21 +97,24 @@ def process_data(self, file: str, test_size: int = .25):
9797 del X_test_scaled_transformed ['Number_Repairs' ]
9898
9999 # Dropping the unscaled numerical columns
100- X_train = X_train .drop (['Age' , 'Temperature' , 'Last_Maintenance' , 'Motor_Current' ], axis = 1 )
101- X_test = X_test .drop (['Age' , 'Temperature' , 'Last_Maintenance' , 'Motor_Current' ], axis = 1 )
102-
100+ X_train = X_train .drop (
101+ ['Age' , 'Temperature' , 'Last_Maintenance' , 'Motor_Current' ], axis = 1 )
102+ X_test = X_test .drop (
103+ ['Age' , 'Temperature' , 'Last_Maintenance' , 'Motor_Current' ], axis = 1 )
104+
103105 X_train = X_train .astype (int )
104106 X_test = X_test .astype (int )
105107
106108 # Creating train and test data with scaled numerical columns
107- X_train_scaled_transformed = pd .concat ([X_train_scaled_transformed , X_train ], axis = 1 )
108- X_test_scaled_transformed = pd .concat ([X_test_scaled_transformed , X_test ], axis = 1 )
109+ X_train_scaled_transformed = pd .concat (
110+ [X_train_scaled_transformed , X_train ], axis = 1 )
111+ X_test_scaled_transformed = pd .concat (
112+ [X_test_scaled_transformed , X_test ], axis = 1 )
109113
110114 self .X_train_scaled_transformed = X_train_scaled_transformed .astype (
111- {'Motor_Current' : 'float64' })
115+ {'Motor_Current' : 'float64' })
112116 self .X_test_scaled_transformed = X_test_scaled_transformed .astype (
113- {'Motor_Current' : 'float64' })
114-
117+ {'Motor_Current' : 'float64' })
115118
116119 def train (self , ncpu : int = 4 ):
117120 """trains an XGBoost Classifier and Tracks Models with MLFlow
@@ -121,28 +124,29 @@ def train(self, ncpu: int = 4):
121124 ncpu : int, optional
122125 number of CPU threads used for training, by default 4
123126 """
124-
127+
125128 # Set xgboost parameters
126129 self .parameters = {
127- 'max_bin' : 256 ,
128- 'scale_pos_weight' : 2 ,
129- 'lambda_l2' : 1 ,
130- 'alpha' : 0.9 ,
131- 'max_depth' : 8 ,
132- 'num_leaves' : 2 ** 8 ,
133- 'verbosity' : 0 ,
134- 'objective' : 'multi:softmax' ,
135- 'learning_rate' : 0.3 ,
136- 'num_class' : 3 ,
137- 'nthread' : ncpu
130+ 'max_bin' : 256 ,
131+ 'scale_pos_weight' : 2 ,
132+ 'lambda_l2' : 1 ,
133+ 'alpha' : 0.9 ,
134+ 'max_depth' : 8 ,
135+ 'num_leaves' : 2 ** 8 ,
136+ 'verbosity' : 0 ,
137+ 'objective' : 'multi:softmax' ,
138+ 'learning_rate' : 0.3 ,
139+ 'num_class' : 3 ,
140+ 'nthread' : ncpu
138141 }
139-
142+
140143 with mlflow .start_run () as run :
141- mlflow .xgboost .autolog ()
142- xgb_train = xgb .DMatrix (self .X_train_scaled_transformed , label = np .array (self .y_train ))
143-
144- self .xgb_model = xgb .train (self .parameters , xgb_train , num_boost_round = 100 )
145-
144+ mlflow .xgboost .autolog ()
145+ xgb_train = xgb .DMatrix (
146+ self .X_train_scaled_transformed , label = np .array (self .y_train ))
147+
148+ self .xgb_model = xgb .train (
149+ self .parameters , xgb_train , num_boost_round = 100 )
146150
147151 def validate (self ):
148152 """performs model validation with testing data
@@ -154,17 +158,20 @@ def validate(self):
154158 """
155159 dtest = xgb .DMatrix (self .X_test_scaled_transformed , self .y_test )
156160 xgb_prediction = self .xgb_model .predict (dtest )
157- xgb_errors_count = np .count_nonzero (xgb_prediction - np .ravel (self .y_test ))
161+ xgb_errors_count = np .count_nonzero (
162+ xgb_prediction - np .ravel (self .y_test ))
158163 self .accuracy_scr = 1 - xgb_errors_count / xgb_prediction .shape [0 ]
159-
160- xp = mlflow .get_experiment_by_name (self .active_experiment )._experiment_id
161- self .run_id = mlflow .search_runs (xp , output_format = "list" )[0 ].info .run_id
162-
163- with mlflow .start_run (self .run_id ):
164- mlflow .log_metric ("accuracy" ,self .accuracy_scr )
165-
164+
165+ xp = mlflow .get_experiment_by_name (
166+ self .active_experiment )._experiment_id
167+ self .run_id = mlflow .search_runs (
168+ xp , output_format = "list" )[0 ].info .run_id
169+
170+ with mlflow .start_run (self .run_id ):
171+ mlflow .log_metric ("accuracy" , self .accuracy_scr )
172+
166173 return self .accuracy_scr
167-
174+
168175 def save (self , model_path ):
169176 """saves trained model to path
170177
@@ -174,13 +181,13 @@ def save(self, model_path):
174181 path where trained model should be saved
175182 """
176183
177- self .model_path = model_path + self .model_name + '.joblib'
178- self .scaler_path = model_path + self .model_name + '_scaler.joblib'
179-
184+ self .model_path = model_path + self .model_name + '.joblib'
185+ self .scaler_path = model_path + self .model_name + '_scaler.joblib'
186+
180187 logger .info ("Saving model" )
181- with open ( self .model_path , "wb" ) as fh :
188+ with open (self .model_path , "wb" ) as fh :
182189 joblib .dump (self .xgb_model , fh .name )
183-
190+
184191 logger .info ("Saving Scaler" )
185- with open ( self .scaler_path , "wb" ) as fh :
192+ with open (self .scaler_path , "wb" ) as fh :
186193 joblib .dump (self .robust_scaler , fh .name )
0 commit comments