Skip to content

Commit

Permalink
Merge pull request #37 from intel/bbela/resolve_formatting_issues
Browse files Browse the repository at this point in the history
Correct formatting issues in the code
  • Loading branch information
kekaczma authored Dec 6, 2024
2 parents 26e865b + 61e1980 commit d3ff298
Show file tree
Hide file tree
Showing 30 changed files with 660 additions and 558 deletions.
3 changes: 2 additions & 1 deletion MLOps_Professional/lab1/sample/data_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pydantic import BaseModel


class MaintenancePayload(BaseModel):
temperature: int
temperature: int
6 changes: 3 additions & 3 deletions MLOps_Professional/lab1/sample/maintenance.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# maitnenace test business logic

def test_maintenance(temperature:int):
def test_maintenance(temperature: int):
"""_summary_
Parameters
Expand All @@ -14,5 +14,5 @@ def test_maintenance(temperature:int):
'Approved' or 'Denied' based on temperature readings
"""
maintenance_status = 'Needs Maintenance' if temperature > 50 else 'No Maintenance Required'
return maintenance_status

return maintenance_status
9 changes: 5 additions & 4 deletions MLOps_Professional/lab1/sample/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ async def ping():
API response
response from server on health status
"""
return {"message":"Server is Running"}
return {"message": "Server is Running"}


@app.post("/maintenance")
async def predict(payload:MaintenancePayload):
async def predict(payload: MaintenancePayload):

maintenance_result = test_maintenance(payload.temperature)
return {"msg": "Completed Analysis", "Maintenance Status": maintenance_result}

if __name__ == "__main__":
uvicorn.run("serve:app", host="0.0.0.0", port=5000, log_level="info")
uvicorn.run("serve:app", host="0.0.0.0", port=5000, log_level="info")
8 changes: 4 additions & 4 deletions MLOps_Professional/lab3/sample/data_model.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from pydantic import BaseModel



class TrainPayload(BaseModel):
file: str
model_name: str
model_path: str
test_size: int = 25
ncpu: int = 4
test_size: int = 25
ncpu: int = 4
mlflow_tracking_uri: str
mlflow_new_experiment: str = None
mlflow_experiment: str = None

12 changes: 6 additions & 6 deletions MLOps_Professional/lab3/sample/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@
data.drop('Temp_Var', axis=1, inplace=True)

Categorical_Variables = pd.get_dummies(
data[[
'Manufacturer',
'Generation',
'Lubrication',
'Product_Assignment']],
drop_first=False)
data[[
'Manufacturer',
'Generation',
'Lubrication',
'Product_Assignment']],
drop_first=False)
data = pd.concat([data, Categorical_Variables], axis=1)
data.drop(['Manufacturer', 'Generation', 'Lubrication', 'Product_Assignment'], axis=1, inplace=True)

Expand Down
11 changes: 6 additions & 5 deletions MLOps_Professional/lab3/sample/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ async def ping():
API response
response from server on health status
"""
return {"message":"Server is Running"}
return {"message": "Server is Running"}


@app.post("/train")
async def train(payload:TrainPayload):
async def train(payload: TrainPayload):
"""Training Endpoint
This endpoint process raw data and trains an XGBoost Classifier
Expand All @@ -40,9 +41,9 @@ async def train(payload:TrainPayload):
Accuracy metrics and other logger feedback on training progress.
"""
model = HarvesterMaintenance(payload.model_name)
model.mlflow_tracking(tracking_uri=payload.mlflow_tracking_uri,
model.mlflow_tracking(tracking_uri=payload.mlflow_tracking_uri,
new_experiment=payload.mlflow_new_experiment,
experiment= payload.mlflow_experiment)
experiment=payload.mlflow_experiment)
logger.info("Configured Experiment and Tracking URI for MLFlow")
model.process_data(payload.file, payload.test_size)
logger.info("Data has been successfully processed")
Expand All @@ -54,4 +55,4 @@ async def train(payload:TrainPayload):
return {"msg": "Model trained succesfully", "validation scores": accuracy_score}

if __name__ == "__main__":
uvicorn.run("serve:app", host="0.0.0.0", port=5000, log_level="info")
uvicorn.run("serve:app", host="0.0.0.0", port=5000, log_level="info")
107 changes: 57 additions & 50 deletions MLOps_Professional/lab3/sample/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import numpy as np
import xgboost as xgb
import pandas as pd
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
Expand All @@ -24,7 +24,7 @@


class HarvesterMaintenance():

def __init__(self, model_name: str):
self.model_name = model_name
self.file = ''
Expand All @@ -39,21 +39,21 @@ def __init__(self, model_name: str):
self.run_id = ''
self.active_experiment = ''
self.xgb_model = ''

def mlflow_tracking(self, tracking_uri: str = './mlruns', experiment: str = None, new_experiment: str = None):

# sets tracking URI
mlflow.set_tracking_uri(tracking_uri)

# creates new experiment if no experiment is specified
if experiment == None:
if experiment is None:
mlflow.create_experiment(new_experiment)
self.active_experiment = new_experiment
mlflow.set_experiment(new_experiment)
else:
mlflow.set_experiment(experiment)
self.active_experiment = experiment

def process_data(self, file: str, test_size: int = .25):
"""processes raw data for training
Expand All @@ -72,11 +72,11 @@ def process_data(self, file: str, test_size: int = .25):
except FileNotFoundError:
sys.exit('Dataset file not found')


X = data.drop('Asset_Label', axis=1)
y = data.Asset_Label

X_train, X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=test_size)
X_train, X_test, self.y_train, self.y_test = train_test_split(
X, y, test_size=test_size)

df_num_train = X_train.select_dtypes(['float', 'int', 'int32'])
df_num_test = X_test.select_dtypes(['float', 'int', 'int32'])
Expand All @@ -97,21 +97,24 @@ def process_data(self, file: str, test_size: int = .25):
del X_test_scaled_transformed['Number_Repairs']

# Dropping the unscaled numerical columns
X_train = X_train.drop(['Age', 'Temperature', 'Last_Maintenance', 'Motor_Current'], axis=1)
X_test = X_test.drop(['Age', 'Temperature', 'Last_Maintenance', 'Motor_Current'], axis=1)

X_train = X_train.drop(
['Age', 'Temperature', 'Last_Maintenance', 'Motor_Current'], axis=1)
X_test = X_test.drop(
['Age', 'Temperature', 'Last_Maintenance', 'Motor_Current'], axis=1)

X_train = X_train.astype(int)
X_test = X_test.astype(int)

# Creating train and test data with scaled numerical columns
X_train_scaled_transformed = pd.concat([X_train_scaled_transformed, X_train], axis=1)
X_test_scaled_transformed = pd.concat([X_test_scaled_transformed, X_test], axis=1)
X_train_scaled_transformed = pd.concat(
[X_train_scaled_transformed, X_train], axis=1)
X_test_scaled_transformed = pd.concat(
[X_test_scaled_transformed, X_test], axis=1)

self.X_train_scaled_transformed = X_train_scaled_transformed.astype(
{'Motor_Current': 'float64'})
{'Motor_Current': 'float64'})
self.X_test_scaled_transformed = X_test_scaled_transformed.astype(
{'Motor_Current': 'float64'})

{'Motor_Current': 'float64'})

def train(self, ncpu: int = 4):
"""trains an XGBoost Classifier and Tracks Models with MLFlow
Expand All @@ -121,28 +124,29 @@ def train(self, ncpu: int = 4):
ncpu : int, optional
number of CPU threads used for training, by default 4
"""

# Set xgboost parameters
self.parameters = {
'max_bin': 256,
'scale_pos_weight': 2,
'lambda_l2': 1,
'alpha': 0.9,
'max_depth': 8,
'num_leaves': 2**8,
'verbosity': 0,
'objective': 'multi:softmax',
'learning_rate': 0.3,
'num_class': 3,
'nthread': ncpu
'max_bin': 256,
'scale_pos_weight': 2,
'lambda_l2': 1,
'alpha': 0.9,
'max_depth': 8,
'num_leaves': 2**8,
'verbosity': 0,
'objective': 'multi:softmax',
'learning_rate': 0.3,
'num_class': 3,
'nthread': ncpu
}

with mlflow.start_run() as run:
mlflow.xgboost.autolog()
xgb_train = xgb.DMatrix(self.X_train_scaled_transformed, label=np.array(self.y_train))

self.xgb_model = xgb.train(self.parameters, xgb_train, num_boost_round=100)

mlflow.xgboost.autolog()
xgb_train = xgb.DMatrix(
self.X_train_scaled_transformed, label=np.array(self.y_train))

self.xgb_model = xgb.train(
self.parameters, xgb_train, num_boost_round=100)

def validate(self):
"""performs model validation with testing data
Expand All @@ -154,17 +158,20 @@ def validate(self):
"""
dtest = xgb.DMatrix(self.X_test_scaled_transformed, self.y_test)
xgb_prediction = self.xgb_model.predict(dtest)
xgb_errors_count = np.count_nonzero(xgb_prediction - np.ravel(self.y_test))
xgb_errors_count = np.count_nonzero(
xgb_prediction - np.ravel(self.y_test))
self.accuracy_scr = 1 - xgb_errors_count / xgb_prediction.shape[0]

xp = mlflow.get_experiment_by_name(self.active_experiment)._experiment_id
self.run_id = mlflow.search_runs(xp, output_format="list")[0].info.run_id

with mlflow.start_run(self.run_id ):
mlflow.log_metric("accuracy",self.accuracy_scr)


xp = mlflow.get_experiment_by_name(
self.active_experiment)._experiment_id
self.run_id = mlflow.search_runs(
xp, output_format="list")[0].info.run_id

with mlflow.start_run(self.run_id):
mlflow.log_metric("accuracy", self.accuracy_scr)

return self.accuracy_scr

def save(self, model_path):
"""saves trained model to path
Expand All @@ -174,13 +181,13 @@ def save(self, model_path):
path where trained model should be saved
"""

self.model_path = model_path + self.model_name + '.joblib'
self.scaler_path = model_path + self.model_name + '_scaler.joblib'
self.model_path = model_path + self.model_name + '.joblib'
self.scaler_path = model_path + self.model_name + '_scaler.joblib'

logger.info("Saving model")
with open( self.model_path, "wb") as fh:
with open(self.model_path, "wb") as fh:
joblib.dump(self.xgb_model, fh.name)

logger.info("Saving Scaler")
with open( self.scaler_path, "wb") as fh:
with open(self.scaler_path, "wb") as fh:
joblib.dump(self.robust_scaler, fh.name)
10 changes: 6 additions & 4 deletions MLOps_Professional/lab4/sample/data_model.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from pydantic import BaseModel



class TrainPayload(BaseModel):
file: str
model_name: str
model_path: str
test_size: int = 25
ncpu: int = 4
test_size: int = 25
ncpu: int = 4
mlflow_tracking_uri: str
mlflow_new_experiment: str = None
mlflow_experiment: str = None


class PredictionPayload(BaseModel):
model_name: str
stage: str
sample: list
model_run_id: str
scaler_file_name: str
scaler_destination: str = './'
scaler_destination: str = './'
12 changes: 6 additions & 6 deletions MLOps_Professional/lab4/sample/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@
data.drop('Temp_Var', axis=1, inplace=True)

Categorical_Variables = pd.get_dummies(
data[[
'Manufacturer',
'Generation',
'Lubrication',
'Product_Assignment']],
drop_first=False)
data[[
'Manufacturer',
'Generation',
'Lubrication',
'Product_Assignment']],
drop_first=False)
data = pd.concat([data, Categorical_Variables], axis=1)
data.drop(['Manufacturer', 'Generation', 'Lubrication', 'Product_Assignment'], axis=1, inplace=True)

Expand Down
Loading

0 comments on commit d3ff298

Please sign in to comment.