From 98ddbaa894a582520fb2816128bbb8ba270cdf4d Mon Sep 17 00:00:00 2001 From: EdenWuyifan Date: Thu, 2 May 2024 14:00:55 -0400 Subject: [PATCH] fix bugs and add scoring type --- alpha_automl/pipeline_search/agent_environment.py | 13 +++++++++---- alpha_automl/pipeline_search/agent_lab.py | 2 +- alpha_automl/pipeline_synthesis/pipeline_builder.py | 9 ++++----- alpha_automl/pipeline_synthesis/setup_search.py | 7 +++++-- alpha_automl/resource/primitives_hierarchy.json | 11 ++++++++--- 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/alpha_automl/pipeline_search/agent_environment.py b/alpha_automl/pipeline_search/agent_environment.py index afeede47..96b0315a 100644 --- a/alpha_automl/pipeline_search/agent_environment.py +++ b/alpha_automl/pipeline_search/agent_environment.py @@ -27,6 +27,12 @@ def __init__(self, config: EnvContext): self.board = self.game.getInitBoard() # initial board self.step_stack = ["S"] # stack for steps self.metadata = self.board[: self.game.m] + + if self.metadata[0] == 2: # regression.error + self.scoring_type = "error" + else: # classification.precision | clustering | regression.r2 + self.scoring_type = "precision" + self.observation_space = Dict( { "board": Box( @@ -93,11 +99,10 @@ def step(self, action): game_end = self.game.getGameEnded(self.board) if game_end == 1: # pipeline score over threshold try: - if self.game.problem == "REGRESSION": - # reward = 10 + (100 / self.game.getEvaluation(self.board)) - reward = 10 + (self.game.getEvaluation(self.board)) ** 3 * 100 + if self.scoring_type == "error": + reward = 10 + (100 / self.game.getEvaluation(self.board)) else: - reward = 10 + (self.game.getEvaluation(self.board)) ** 2 * 100 + reward = 10 + (self.game.getEvaluation(self.board)) ** 3 * 100 except Exception as e: logger.critical(f"[PIPELINE FOUND] Error happened: {str(e)}") elif game_end == 2: # finished but invalid diff --git a/alpha_automl/pipeline_search/agent_lab.py b/alpha_automl/pipeline_search/agent_lab.py index 386ffd4f..1efd9297 100644 --- a/alpha_automl/pipeline_search/agent_lab.py +++ b/alpha_automl/pipeline_search/agent_lab.py @@ -82,7 +82,7 @@ def train_rllib_model(algo, time_bound, checkpoint_load_folder, checkpoint_save_ while True: if ( time.time() > timeout - or (best_unchanged_iter >= 600 and result["episode_reward_mean"] >= 0) + or (best_unchanged_iter >= 10 and result["episode_reward_mean"] >= 0) # or result["episode_reward_mean"] >= 70 ): logger.debug(f"Training timeout reached") diff --git a/alpha_automl/pipeline_synthesis/pipeline_builder.py b/alpha_automl/pipeline_synthesis/pipeline_builder.py index 858fe7bd..342afad6 100644 --- a/alpha_automl/pipeline_synthesis/pipeline_builder.py +++ b/alpha_automl/pipeline_synthesis/pipeline_builder.py @@ -16,14 +16,13 @@ "lightgbm.LGBMClassifier": {'verbose': -1}, "lightgbm.LGBMRegressor": {'verbose': -1}, "catboost.CatBoostRegressor": { - 'depth': 8, 'grow_policy': 'Depthwise', - 'l2_leaf_reg': 2.7997999596449104, - 'learning_rate': 0.031375015734637225, - 'max_ctr_complexity': 2, - 'one_hot_max_size': 3, 'logging_level': 'Silent' }, + "catboost.CatBoostClassifier": { + 'grow_policy': 'Depthwise', + 'logging_level': 'Silent' + } } diff --git a/alpha_automl/pipeline_synthesis/setup_search.py b/alpha_automl/pipeline_synthesis/setup_search.py index e54dbd64..d4911105 100644 --- a/alpha_automl/pipeline_synthesis/setup_search.py +++ b/alpha_automl/pipeline_synthesis/setup_search.py @@ -170,16 +170,19 @@ def compute_metafeatures(metric, metadata): "mean_squared_error", "mean_squared_log_error", "median_absolute_error", - "r2_score", ]: scoring_type = 2 + elif metric in [ + "r2_score", + ]: + scoring_type = 3 elif metric in [ "adjusted_mutual_info_score", "rand_score", "mutual_info_score", "normalized_mutual_info_score", ]: - scoring_type = 3 + scoring_type = 4 metafeatures.append(scoring_type) # IMPUTE diff --git a/alpha_automl/resource/primitives_hierarchy.json b/alpha_automl/resource/primitives_hierarchy.json index 58a29ff8..92121a4c 100644 --- a/alpha_automl/resource/primitives_hierarchy.json +++ b/alpha_automl/resource/primitives_hierarchy.json @@ -19,7 +19,8 @@ "sklearn.svm.SVC", "sklearn.tree.DecisionTreeClassifier", "xgboost.XGBClassifier", - "lightgbm.LGBMClassifier" + "lightgbm.LGBMClassifier", + "catboost.CatBoostClassifier" ], "CLUSTERER": [ "sklearn.cluster.KMeans", @@ -32,10 +33,14 @@ "alpha_automl.builtin_primitives.datetime_encoder.DummyEncoder" ], "FEATURE_SCALER": [ - "sklearn.preprocessing.RobustScaler" + "sklearn.preprocessing.MaxAbsScaler", + "sklearn.preprocessing.RobustScaler", + "sklearn.preprocessing.StandardScaler" ], "FEATURE_SELECTOR": [ - "sklearn.feature_selection.SelectPercentile" + "sklearn.feature_selection.GenericUnivariateSelect", + "sklearn.feature_selection.SelectPercentile", + "sklearn.feature_selection.SelectKBest" ], "IMPUTER": [ "sklearn.impute.SimpleImputer"