Skip to content

Commit

Permalink
unit 3
Browse files Browse the repository at this point in the history
  • Loading branch information
tommydangerous committed May 14, 2024
1 parent 9142c84 commit e90b3fc
Show file tree
Hide file tree
Showing 17 changed files with 167 additions and 32 deletions.
6 changes: 4 additions & 2 deletions .env.dev
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
ENV=development
PROJECT_NAME=mlops
MAGE_CODE_PATH=/home/mage_code
MAGE_DATABASE_CONNECTION_URL=postgresql+psycopg2://postgres:password@magic-database:5432/magic
POSTGRES_DB=magic
POSTGRES_PASSWORD=password
POSTGRES_USER=postgres
PROJECT_NAME=mlops
EXPERIMENTS_DB=experiments
EXPERIMENTS_TRACKING_URI=postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@magic-database:5432/$EXPERIMENTS_DB
MAGE_DATABASE_CONNECTION_URL=postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@magic-database:5432/$POSTGRES_DB
SMTP_EMAIL=
SMTP_PASSWORD=
3 changes: 1 addition & 2 deletions mlops/settings.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
projects:
unit_0:
active: true
unit_0: {}
unit_1: {}
unit_2: {}
unit_3: {}
Expand Down
2 changes: 1 addition & 1 deletion mlops/unit_1/global_data_products.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ training_set:
object_type: pipeline
object_uuid: data_preparation
outdated_after:
seconds: 3599
seconds: 3600
settings:
build:
partitions: 1
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import mlflow
import os

import mlflow

if 'custom' not in globals():
from mage_ai.data_preparation.decorators import custom
Expand All @@ -8,11 +9,12 @@
@custom
def setup(*args, **kwargs):
# Get tracking URI from the pipeline’s variable with a default value
tracking_uri = kwargs.get('tracking_uri', 'sqlite:///mlflow.db')
tracking_uri = os.getenv('EXPERIMENTS_TRACKING_URI', 'sqlite:///mlflow.db')

# Get experiment name from the pipeline’s variable with a default value
experiment_name = kwargs.get('experiment_name', 'nyc-taxi-experiment')

# Set the MLflow tracking URI
mlflow.set_tracking_uri(tracking_uri)
# Set the experiment name in MLflow
mlflow.set_experiment(experiment_name)
mlflow.set_experiment(experiment_name)
28 changes: 28 additions & 0 deletions mlops/unit_2/custom/load_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Dict, List, Tuple

if 'custom' not in globals():
from mage_ai.data_preparation.decorators import custom


@custom
def models(*args, **kwargs) -> Tuple[List[str], List[Dict[str, str]]]:
"""
models: comma separated strings
linear_model.Lasso
linear_model.LinearRegression
svm.LinearSVR
ensemble.ExtraTreesRegressor
ensemble.GradientBoostingRegressor
ensemble.RandomForestRegressor
"""
model_names: str = kwargs.get(
'models', 'linear_model.LinearRegression,linear_model.Lasso'
)
child_data: List[str] = [
model_name.strip() for model_name in model_names.split(',')
]
child_metadata: List[Dict] = [
dict(block_uuid=model_name.split('.')[-1]) for model_name in child_data
]

return child_data, child_metadata
File renamed without changes.
29 changes: 29 additions & 0 deletions mlops/unit_2/data_exporters/training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from typing import Callable, Dict, Tuple, Union

from pandas import Series
from scipy.sparse._csr import csr_matrix
from sklearn.base import BaseEstimator

from mlops.utils.models.sklearn import load_class, train_model

if 'data_exporter' not in globals():
from mage_ai.data_preparation.decorators import data_exporter


@data_exporter
def train(
settings: Tuple[
Dict[str, Union[bool, float, int, str]],
csr_matrix,
Series,
Dict[str, Union[Callable[..., BaseEstimator], str]],
],
**kwargs,
) -> BaseEstimator:
hyperparameters, X, y, model_info = settings

model_class = model_info['cls']
model = model_class(**hyperparameters)
model.fit(X, y)

return model, model_info
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -3,78 +3,97 @@ blocks:
color: null
configuration:
global_data_product:
project: unit_2
uuid: training_set
downstream_blocks: []
downstream_blocks:
- hyperparameter_tuning
executor_config: null
executor_type: local_python
has_callback: false
language: python
name: Training set
retry_config: null
status: executed
status: failed
timeout: null
type: global_data_product
upstream_blocks: []
uuid: training_set
- all_upstream_blocks_executed: true
color: pink
configuration:
file_source:
path: unit_2/custom/experiment_setup.py
downstream_blocks:
- hyperparameter_tuning
executor_config: null
executor_type: local_python
has_callback: false
language: python
name: experiment_setup
retry_config: null
status: executed
timeout: null
type: custom
upstream_blocks: []
uuid: experiment_setup
- all_upstream_blocks_executed: true
color: teal
configuration:
dynamic: true
file_path: custom/load_models.py
file_source:
path: custom/load_models.py
downstream_blocks: []
path: unit_2/custom/load_models.py
downstream_blocks:
- hyperparameter_tuning
executor_config: null
executor_type: local_python
has_callback: false
language: python
name: Load models
retry_config: null
status: updated
status: executed
timeout: null
type: custom
upstream_blocks: []
uuid: load_models
- all_upstream_blocks_executed: false
color: null
configuration:
file_path: transformers/hyperparameter_tuning/sklearn_hyperparameter_tuning.py
file_source:
path: transformers/hyperparameter_tuning/sklearn_hyperparameter_tuning.py
path: unit_2/transformers/hyperparameter_tuning.py
downstream_blocks:
- sklearn_training
- training
executor_config: null
executor_type: local_python
has_callback: false
language: python
name: sklearn hyperparameter tuning
name: Hyperparameter tuning
retry_config: null
status: executed
status: failed
timeout: null
type: transformer
upstream_blocks:
- training_set
- load_models
uuid: sklearn_hyperparameter_tuning
- experiment_setup
uuid: hyperparameter_tuning
- all_upstream_blocks_executed: false
color: null
configuration:
file_path: data_exporters/sklearn_training.py
file_source:
path: data_exporters/sklearn_training.py
path: unit_2/data_exporters/training.py
downstream_blocks: []
executor_config: null
executor_type: local_python
has_callback: false
language: python
name: sklearn training
name: Training
retry_config: null
status: executed
timeout: null
type: data_exporter
upstream_blocks:
- sklearn_hyperparameter_tuning
uuid: sklearn_training
- hyperparameter_tuning
uuid: training
cache_block_output_in_memory: false
callbacks: []
concurrency_config:
Expand Down Expand Up @@ -104,10 +123,7 @@ type: python
uuid: sklearn_training
variables:
developer: dangerous
experiment_name: zoomcamp
max_evaluations: 1
models: linear_model.LinearRegression,linear_model.Lasso
random_state: 3
tracking_uri: postgresql+psycopg2://postgres:[email protected]:5432/mlops_runs
variables_dir: /root/.mage_data/mlops
variables_dir: /home/src/mage_data/unit_2
widgets: []
Empty file.
Empty file.
38 changes: 38 additions & 0 deletions mlops/unit_2/transformers/hyperparameter_tuning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Callable, Dict, Tuple, Union

from pandas import Series
from scipy.sparse._csr import csr_matrix
from sklearn.base import BaseEstimator

from mlops.utils.models.sklearn import load_class, tune_hyperparameters

if 'transformer' not in globals():
from mage_ai.data_preparation.decorators import transformer


@transformer
def hyperparameter_tuning(
training_set: Dict[str, Union[Series, csr_matrix]],
model_class_name: str,
**kwargs,
) -> Tuple[
Dict[str, Union[bool, float, int, str]],
csr_matrix,
Series,
Callable[..., BaseEstimator],
]:
X, X_train, X_val, y, y_train, y_val, _ = training_set['build']

model_class = load_class(model_class_name)

hyperparameters = tune_hyperparameters(
model_class,
X_train=X_train,
y_train=y_train,
X_val=X_val,
y_val=y_val,
max_evaluations=kwargs.get('max_evaluations', 50),
random_state=kwargs.get('random_state', 3),
)

return hyperparameters, X, y, dict(cls=model_class, name=model_class_name)
27 changes: 24 additions & 3 deletions scripts/init-db.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
#!/bin/bash
set -e
# set -e is removed to allow the script to continue executing after an error.

psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL
CREATE DATABASE "$POSTGRES_DB";
# Function to check if a database exists and create it if it does not.
# It takes one argument: the name of the database to check and potentially create.
create_db_if_not_exists() {
local dbname=$1
PGPASSWORD=$POSTGRES_PASSWORD psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "postgres" --list | grep -qw $dbname
if [ $? -ne 0 ]; then
echo "Database $dbname not found, creating..."
PGPASSWORD=$POSTGRES_PASSWORD psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "postgres" -c "CREATE DATABASE $dbname;"
else
echo "Database $dbname already exists, skipping creation."
fi
}

# Attempt to create the main application database.
create_db_if_not_exists "$POSTGRES_DB"

# Attempt to create the 'experiments' database.
create_db_if_not_exists "$EXPERIMENTS_DB"

# Assuming the role exists; otherwise, you'd include CREATE ROLE commands as needed.
# Grant all privileges on the databases to the specified user.
PGPASSWORD=$POSTGRES_PASSWORD psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "postgres" <<-EOSQL
GRANT ALL PRIVILEGES ON DATABASE "$POSTGRES_DB" TO "$POSTGRES_USER";
GRANT ALL PRIVILEGES ON DATABASE "$EXPERIMENTS_DB" TO "$POSTGRES_USER";
EOSQL

0 comments on commit e90b3fc

Please sign in to comment.