Skip to content

Commit d3ff298

Browse files
authored
Merge pull request #37 from intel/bbela/resolve_formatting_issues
Correct formatting issues in the code
2 parents 26e865b + 61e1980 commit d3ff298

File tree

30 files changed

+660
-558
lines changed

30 files changed

+660
-558
lines changed
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pydantic import BaseModel
22

3+
34
class MaintenancePayload(BaseModel):
4-
temperature: int
5+
temperature: int

MLOps_Professional/lab1/sample/maintenance.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# maitnenace test business logic
22

3-
def test_maintenance(temperature:int):
3+
def test_maintenance(temperature: int):
44
"""_summary_
55
66
Parameters
@@ -14,5 +14,5 @@ def test_maintenance(temperature:int):
1414
'Approved' or 'Denied' based on temperature readings
1515
"""
1616
maintenance_status = 'Needs Maintenance' if temperature > 50 else 'No Maintenance Required'
17-
18-
return maintenance_status
17+
18+
return maintenance_status

MLOps_Professional/lab1/sample/serve.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@ async def ping():
2323
API response
2424
response from server on health status
2525
"""
26-
return {"message":"Server is Running"}
26+
return {"message": "Server is Running"}
27+
2728

2829
@app.post("/maintenance")
29-
async def predict(payload:MaintenancePayload):
30-
30+
async def predict(payload: MaintenancePayload):
31+
3132
maintenance_result = test_maintenance(payload.temperature)
3233
return {"msg": "Completed Analysis", "Maintenance Status": maintenance_result}
3334

3435
if __name__ == "__main__":
35-
uvicorn.run("serve:app", host="0.0.0.0", port=5000, log_level="info")
36+
uvicorn.run("serve:app", host="0.0.0.0", port=5000, log_level="info")
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from pydantic import BaseModel
2-
2+
3+
34
class TrainPayload(BaseModel):
45
file: str
56
model_name: str
67
model_path: str
7-
test_size: int = 25
8-
ncpu: int = 4
8+
test_size: int = 25
9+
ncpu: int = 4
910
mlflow_tracking_uri: str
1011
mlflow_new_experiment: str = None
1112
mlflow_experiment: str = None
12-

MLOps_Professional/lab3/sample/generate_data.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,12 @@
9696
data.drop('Temp_Var', axis=1, inplace=True)
9797

9898
Categorical_Variables = pd.get_dummies(
99-
data[[
100-
'Manufacturer',
101-
'Generation',
102-
'Lubrication',
103-
'Product_Assignment']],
104-
drop_first=False)
99+
data[[
100+
'Manufacturer',
101+
'Generation',
102+
'Lubrication',
103+
'Product_Assignment']],
104+
drop_first=False)
105105
data = pd.concat([data, Categorical_Variables], axis=1)
106106
data.drop(['Manufacturer', 'Generation', 'Lubrication', 'Product_Assignment'], axis=1, inplace=True)
107107

MLOps_Professional/lab3/sample/serve.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ async def ping():
2222
API response
2323
response from server on health status
2424
"""
25-
return {"message":"Server is Running"}
25+
return {"message": "Server is Running"}
26+
2627

2728
@app.post("/train")
28-
async def train(payload:TrainPayload):
29+
async def train(payload: TrainPayload):
2930
"""Training Endpoint
3031
This endpoint process raw data and trains an XGBoost Classifier
3132
@@ -40,9 +41,9 @@ async def train(payload:TrainPayload):
4041
Accuracy metrics and other logger feedback on training progress.
4142
"""
4243
model = HarvesterMaintenance(payload.model_name)
43-
model.mlflow_tracking(tracking_uri=payload.mlflow_tracking_uri,
44+
model.mlflow_tracking(tracking_uri=payload.mlflow_tracking_uri,
4445
new_experiment=payload.mlflow_new_experiment,
45-
experiment= payload.mlflow_experiment)
46+
experiment=payload.mlflow_experiment)
4647
logger.info("Configured Experiment and Tracking URI for MLFlow")
4748
model.process_data(payload.file, payload.test_size)
4849
logger.info("Data has been successfully processed")
@@ -54,4 +55,4 @@ async def train(payload:TrainPayload):
5455
return {"msg": "Model trained succesfully", "validation scores": accuracy_score}
5556

5657
if __name__ == "__main__":
57-
uvicorn.run("serve:app", host="0.0.0.0", port=5000, log_level="info")
58+
uvicorn.run("serve:app", host="0.0.0.0", port=5000, log_level="info")

MLOps_Professional/lab3/sample/train.py

Lines changed: 57 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import numpy as np
1212
import xgboost as xgb
13-
import pandas as pd
13+
import pandas as pd
1414

1515
from sklearn.model_selection import train_test_split
1616
from sklearn.preprocessing import RobustScaler
@@ -24,7 +24,7 @@
2424

2525

2626
class HarvesterMaintenance():
27-
27+
2828
def __init__(self, model_name: str):
2929
self.model_name = model_name
3030
self.file = ''
@@ -39,21 +39,21 @@ def __init__(self, model_name: str):
3939
self.run_id = ''
4040
self.active_experiment = ''
4141
self.xgb_model = ''
42-
42+
4343
def mlflow_tracking(self, tracking_uri: str = './mlruns', experiment: str = None, new_experiment: str = None):
44-
44+
4545
# sets tracking URI
4646
mlflow.set_tracking_uri(tracking_uri)
47-
47+
4848
# creates new experiment if no experiment is specified
49-
if experiment == None:
49+
if experiment is None:
5050
mlflow.create_experiment(new_experiment)
5151
self.active_experiment = new_experiment
5252
mlflow.set_experiment(new_experiment)
5353
else:
5454
mlflow.set_experiment(experiment)
5555
self.active_experiment = experiment
56-
56+
5757
def process_data(self, file: str, test_size: int = .25):
5858
"""processes raw data for training
5959
@@ -72,11 +72,11 @@ def process_data(self, file: str, test_size: int = .25):
7272
except FileNotFoundError:
7373
sys.exit('Dataset file not found')
7474

75-
7675
X = data.drop('Asset_Label', axis=1)
7776
y = data.Asset_Label
7877

79-
X_train, X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=test_size)
78+
X_train, X_test, self.y_train, self.y_test = train_test_split(
79+
X, y, test_size=test_size)
8080

8181
df_num_train = X_train.select_dtypes(['float', 'int', 'int32'])
8282
df_num_test = X_test.select_dtypes(['float', 'int', 'int32'])
@@ -97,21 +97,24 @@ def process_data(self, file: str, test_size: int = .25):
9797
del X_test_scaled_transformed['Number_Repairs']
9898

9999
# Dropping the unscaled numerical columns
100-
X_train = X_train.drop(['Age', 'Temperature', 'Last_Maintenance', 'Motor_Current'], axis=1)
101-
X_test = X_test.drop(['Age', 'Temperature', 'Last_Maintenance', 'Motor_Current'], axis=1)
102-
100+
X_train = X_train.drop(
101+
['Age', 'Temperature', 'Last_Maintenance', 'Motor_Current'], axis=1)
102+
X_test = X_test.drop(
103+
['Age', 'Temperature', 'Last_Maintenance', 'Motor_Current'], axis=1)
104+
103105
X_train = X_train.astype(int)
104106
X_test = X_test.astype(int)
105107

106108
# Creating train and test data with scaled numerical columns
107-
X_train_scaled_transformed = pd.concat([X_train_scaled_transformed, X_train], axis=1)
108-
X_test_scaled_transformed = pd.concat([X_test_scaled_transformed, X_test], axis=1)
109+
X_train_scaled_transformed = pd.concat(
110+
[X_train_scaled_transformed, X_train], axis=1)
111+
X_test_scaled_transformed = pd.concat(
112+
[X_test_scaled_transformed, X_test], axis=1)
109113

110114
self.X_train_scaled_transformed = X_train_scaled_transformed.astype(
111-
{'Motor_Current': 'float64'})
115+
{'Motor_Current': 'float64'})
112116
self.X_test_scaled_transformed = X_test_scaled_transformed.astype(
113-
{'Motor_Current': 'float64'})
114-
117+
{'Motor_Current': 'float64'})
115118

116119
def train(self, ncpu: int = 4):
117120
"""trains an XGBoost Classifier and Tracks Models with MLFlow
@@ -121,28 +124,29 @@ def train(self, ncpu: int = 4):
121124
ncpu : int, optional
122125
number of CPU threads used for training, by default 4
123126
"""
124-
127+
125128
# Set xgboost parameters
126129
self.parameters = {
127-
'max_bin': 256,
128-
'scale_pos_weight': 2,
129-
'lambda_l2': 1,
130-
'alpha': 0.9,
131-
'max_depth': 8,
132-
'num_leaves': 2**8,
133-
'verbosity': 0,
134-
'objective': 'multi:softmax',
135-
'learning_rate': 0.3,
136-
'num_class': 3,
137-
'nthread': ncpu
130+
'max_bin': 256,
131+
'scale_pos_weight': 2,
132+
'lambda_l2': 1,
133+
'alpha': 0.9,
134+
'max_depth': 8,
135+
'num_leaves': 2**8,
136+
'verbosity': 0,
137+
'objective': 'multi:softmax',
138+
'learning_rate': 0.3,
139+
'num_class': 3,
140+
'nthread': ncpu
138141
}
139-
142+
140143
with mlflow.start_run() as run:
141-
mlflow.xgboost.autolog()
142-
xgb_train = xgb.DMatrix(self.X_train_scaled_transformed, label=np.array(self.y_train))
143-
144-
self.xgb_model = xgb.train(self.parameters, xgb_train, num_boost_round=100)
145-
144+
mlflow.xgboost.autolog()
145+
xgb_train = xgb.DMatrix(
146+
self.X_train_scaled_transformed, label=np.array(self.y_train))
147+
148+
self.xgb_model = xgb.train(
149+
self.parameters, xgb_train, num_boost_round=100)
146150

147151
def validate(self):
148152
"""performs model validation with testing data
@@ -154,17 +158,20 @@ def validate(self):
154158
"""
155159
dtest = xgb.DMatrix(self.X_test_scaled_transformed, self.y_test)
156160
xgb_prediction = self.xgb_model.predict(dtest)
157-
xgb_errors_count = np.count_nonzero(xgb_prediction - np.ravel(self.y_test))
161+
xgb_errors_count = np.count_nonzero(
162+
xgb_prediction - np.ravel(self.y_test))
158163
self.accuracy_scr = 1 - xgb_errors_count / xgb_prediction.shape[0]
159-
160-
xp = mlflow.get_experiment_by_name(self.active_experiment)._experiment_id
161-
self.run_id = mlflow.search_runs(xp, output_format="list")[0].info.run_id
162-
163-
with mlflow.start_run(self.run_id ):
164-
mlflow.log_metric("accuracy",self.accuracy_scr)
165-
164+
165+
xp = mlflow.get_experiment_by_name(
166+
self.active_experiment)._experiment_id
167+
self.run_id = mlflow.search_runs(
168+
xp, output_format="list")[0].info.run_id
169+
170+
with mlflow.start_run(self.run_id):
171+
mlflow.log_metric("accuracy", self.accuracy_scr)
172+
166173
return self.accuracy_scr
167-
174+
168175
def save(self, model_path):
169176
"""saves trained model to path
170177
@@ -174,13 +181,13 @@ def save(self, model_path):
174181
path where trained model should be saved
175182
"""
176183

177-
self.model_path = model_path + self.model_name + '.joblib'
178-
self.scaler_path = model_path + self.model_name + '_scaler.joblib'
179-
184+
self.model_path = model_path + self.model_name + '.joblib'
185+
self.scaler_path = model_path + self.model_name + '_scaler.joblib'
186+
180187
logger.info("Saving model")
181-
with open( self.model_path, "wb") as fh:
188+
with open(self.model_path, "wb") as fh:
182189
joblib.dump(self.xgb_model, fh.name)
183-
190+
184191
logger.info("Saving Scaler")
185-
with open( self.scaler_path, "wb") as fh:
192+
with open(self.scaler_path, "wb") as fh:
186193
joblib.dump(self.robust_scaler, fh.name)
Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
from pydantic import BaseModel
2-
2+
3+
34
class TrainPayload(BaseModel):
45
file: str
56
model_name: str
67
model_path: str
7-
test_size: int = 25
8-
ncpu: int = 4
8+
test_size: int = 25
9+
ncpu: int = 4
910
mlflow_tracking_uri: str
1011
mlflow_new_experiment: str = None
1112
mlflow_experiment: str = None
1213

14+
1315
class PredictionPayload(BaseModel):
1416
model_name: str
1517
stage: str
1618
sample: list
1719
model_run_id: str
1820
scaler_file_name: str
19-
scaler_destination: str = './'
21+
scaler_destination: str = './'

MLOps_Professional/lab4/sample/generate_data.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,12 @@
9696
data.drop('Temp_Var', axis=1, inplace=True)
9797

9898
Categorical_Variables = pd.get_dummies(
99-
data[[
100-
'Manufacturer',
101-
'Generation',
102-
'Lubrication',
103-
'Product_Assignment']],
104-
drop_first=False)
99+
data[[
100+
'Manufacturer',
101+
'Generation',
102+
'Lubrication',
103+
'Product_Assignment']],
104+
drop_first=False)
105105
data = pd.concat([data, Categorical_Variables], axis=1)
106106
data.drop(['Manufacturer', 'Generation', 'Lubrication', 'Product_Assignment'], axis=1, inplace=True)
107107

0 commit comments

Comments
 (0)