diff --git a/asreview2-optuna/classifiers.py b/asreview2-optuna/classifiers.py index 121b71e..cae57ad 100644 --- a/asreview2-optuna/classifiers.py +++ b/asreview2-optuna/classifiers.py @@ -6,8 +6,6 @@ SVM, ) -from sklearn.ensemble import RandomForestClassifier - def naive_bayes_params(trial: optuna.trial.FrozenTrial): # Use logarithmic normal distribution for alpha (alpha effect is non-linear) @@ -29,12 +27,8 @@ def svm_params(trial: optuna.trial.FrozenTrial): def random_forest_params(trial: optuna.trial.FrozenTrial): # Use normal distribution for n_estimators (n_estimators effect is linear) - n_estimators = trial.suggest_int("rf__n_estimators", 50, 200) - - # Use normal distribution for max_features (max_features effect is linear) - max_features = trial.suggest_categorical("rf__max_features", ["sqrt", "log2"]) - - return {"n_estimators": n_estimators, "max_features": max_features} + n_estimators = trial.suggest_int("rf__n_estimators", 100, 200) + return {"n_estimators": n_estimators, "max_features": "sqrt"} classifier_params = { @@ -45,24 +39,6 @@ def random_forest_params(trial: optuna.trial.FrozenTrial): } -class RFClassifier(RandomForestClassifier): - """Random forest classifier. - - Based on the sklearn implementation of the random forest - sklearn.ensemble.RandomForestClassifier. - """ - - name = "rf" - label = "Random forest" - - def __init__(self, n_estimators=100, max_features=10, **kwargs): - super().__init__( - n_estimators=int(n_estimators), - max_features=max_features, - **kwargs, - ) - - classifiers = { "nb": NaiveBayes, "log": Logistic, diff --git a/asreview2-optuna/main.py b/asreview2-optuna/main.py index 2bc9c90..56b5979 100644 --- a/asreview2-optuna/main.py +++ b/asreview2-optuna/main.py @@ -19,13 +19,13 @@ from feature_extractors import feature_extractor_params, feature_extractors # Study variables -VERSION = 1 +VERSION = 2 METRIC = "ndcg" # Options: "loss", "ndcg" STUDY_SET = "full" CLASSIFIER_TYPE = "rf" # Options: "nb", "log", "svm", "rf" FEATURE_EXTRACTOR_TYPE = "tfidf" # Options: "tfidf", "onehot", "labse", "bge-m3", "stella", "mxbai" PICKLE_FOLDER_PATH = Path("synergy-dataset", f"pickles_{FEATURE_EXTRACTOR_TYPE}") -PRE_PROCESSED_FMS = True # False = on the fly +PRE_PROCESSED_FMS = False # False = on the fly PARALLELIZE_OBJECTIVE = True AUTO_SHUTDOWN = True