diff --git a/CHANGELOG.md b/CHANGELOG.md index 4562396f..b07d8349 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `load_geojson` - `load_ml_model` - `load_url` + - `mlm_class_catboost` - `mlm_class_lighttae` - `mlm_class_mlp` - `mlm_class_random_forest` @@ -30,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `mlm_class_tempcnn` - `mlm_class_xgboost` - `mlm_regr_random_forest` + - `mlm_regr_svm` - `ml_fit` - `ml_label_class` - `ml_predict` diff --git a/proposals-ml/mlm_class_1dcnn.json b/proposals-ml/mlm_class_1dcnn.json new file mode 100644 index 00000000..887276c0 --- /dev/null +++ b/proposals-ml/mlm_class_1dcnn.json @@ -0,0 +1,178 @@ +{ + "id": "mlm_class_1dcnn", + "summary": "Initialize a 1D CNN classification model", + "description": "Initializes a 1D Convolutional Neural Network (CNN) classification model. The number of input channels and output classes are inferred automatically from the training data at fit time. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "conv_filters", + "description": "List of integers specifying the number of filters in each convolutional layer. The final output layer for classification will be added automatically based on the number of classes in the training data.", + "default": [64, 128, 256, 512], + "schema": { + "type": "array", + "items": { + "type": "integer", + "minimum": 1 + }, + "minItems": 4, + "maxItems": 4 + } + }, + { + "name": "conv_kernels", + "description": "List of integers or 'global' for kernel size in each convolutional layer. The 4th conv uses a global kernel (covers the entire sequence at that layer). The final output layer for classification will use a 1x1 kernel and be added automatically.", + "default": [3, 3, 3, "global"], + "schema": { + "type": "array", + "items": { + "oneOf": [ + {"type": "integer", "minimum": 1}, + {"type": "string", "enum": ["global"]} + ] + }, + "minItems": 4, + "maxItems": 4 + } + }, + { + "name": "conv_strides", + "description": "List of integers specifying the stride for each convolutional layer. The final output layer for classification will use stride 1 and be added automatically.", + "default": [1, 1, 1, 1], + "schema": { + "type": "array", + "items": { + "type": "integer", + "minimum": 1 + }, + "minItems": 4, + "maxItems": 4 + } + }, + { + "name": "use_batchnorm", + "description": "List of booleans specifying whether to use batch normalization after each convolutional layer. The final output layer will not use batch normalization.", + "default": [true, true, true, false], + "schema": { + "type": "array", + "items": { + "type": "boolean" + }, + "minItems": 4, + "maxItems": 4 + } + }, + { + "name": "activation", + "description": "Activation function to use after each convolutional layer.", + "default": "relu", + "schema": { + "type": "string", + "enum": ["relu", "tanh", "sigmoid", "leakyrelu"] + } + }, + { + "name": "maxpool_sizes", + "description": "List of integers specifying the pool size for each max pooling layer (after each of the first 3 conv blocks).", + "default": [2, 2, 2], + "schema": { + "type": "array", + "items": { + "type": "integer", + "minimum": 1 + }, + "minItems": 3, + "maxItems": 3 + } + }, + { + "name": "maxpool_strides", + "description": "List of integers specifying the stride for each max pooling layer.", + "default": [2, 2, 2], + "schema": { + "type": "array", + "items": { + "type": "integer", + "minimum": 1 + }, + "minItems": 3, + "maxItems": 3 + } + }, + { + "name": "epochs", + "description": "Number of training epochs.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "batch_size", + "description": "Size of the training batches.", + "optional": true, + "default": 64, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "optimizer", + "description": "The optimizer to use for training.", + "optional": true, + "default": "adam", + "schema": { + "type": "string", + "enum": [ + "adam", + "sgd", + "rmsprop", + "adagrad", + "nadam" + ] + } + }, + { + "name": "learning_rate", + "description": "The learning rate for the optimizer.", + "optional": true, + "default": 0.001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be trained using ``ml_fit``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://ieeexplore.ieee.org/document/8921180", + "title": "Song et al. (2019): Land Cover Classification for Satellite Images Through 1D CNN", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file diff --git a/proposals-ml/mlm_class_catboost.json b/proposals-ml/mlm_class_catboost.json new file mode 100644 index 00000000..7a46d0a1 --- /dev/null +++ b/proposals-ml/mlm_class_catboost.json @@ -0,0 +1,62 @@ +{ + "id": "mlm_class_catboost", + "summary": "Initialize a CatBoost classification model", + "description": "Initializes a CatBoost classification model. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "iterations", + "description": "The maximum number of trees that can be built during the training process.", + "optional": true, + "default": 5, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 500 + } + }, + { + "name": "depth", + "description": "Depth of the trees in the CatBoost model.", + "optional": true, + "default": 5, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 16 + } + }, + { + "name": "seed", + "description": "The random seed used for training, for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": 0, + "schema": { + "type": [ + "integer", + "null" + ], + "minimum": 0, + "maximum": 2147483647 + } + } + ], + "returns": { + "description": "A model object that can be trained using ``ml_fit``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://catboost.ai/", + "title": "CatBoost Documentation", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file diff --git a/proposals-ml/mlm_regr_svm.json b/proposals-ml/mlm_regr_svm.json new file mode 100644 index 00000000..ba63930c --- /dev/null +++ b/proposals-ml/mlm_regr_svm.json @@ -0,0 +1,122 @@ +{ + "id": "mlm_regr_svm", + "summary": "Initialize an SVM regression model", + "description": "Initializes a Support Vector Machine (SVM) regression model. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "kernel", + "description": "Specifies the kernel type to be used in the algorithm.", + "optional": true, + "default": "rbf", + "schema": { + "type": "string", + "enum": [ + "linear", + "poly", + "rbf", + "sigmoid" + ] + } + }, + { + "name": "C", + "description": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive.", + "optional": true, + "default": 1, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "epsilon", + "description": "Epsilon in the epsilon-SVR model. Specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value.", + "optional": true, + "default": 0.1, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "gamma", + "description": "Kernel coefficient for 'rbf', 'poly', and 'sigmoid'. Higher values lead to tighter fits.", + "optional": true, + "default": 1, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "degree", + "description": "Degree of the polynomial kernel function (only relevant for 'poly' kernel).", + "optional": true, + "default": 3, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "coef0", + "description": "Independent term in the kernel function (only relevant for 'poly' and 'sigmoid' kernels).", + "optional": true, + "default": 0, + "schema": { + "type": "number" + } + }, + { + "name": "tolerance", + "description": "Tolerance of termination criterion.", + "optional": true, + "default": 0.001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "cachesize", + "description": "Size of the kernel cache in MB.", + "optional": true, + "default": 1000, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be trained using ``ml_fit``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://link.springer.com/chapter/10.1007/978-1-4302-5990-9_4", + "title": "Awad, M., Khanna, R., Awad, M., & Khanna, R. (2015). Support vector regression. Efficient learning machines: Theories, concepts, and applications for engineers and system designers.", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file diff --git a/proposals/mlm_class_catboost.json b/proposals/mlm_class_catboost.json new file mode 100644 index 00000000..7a46d0a1 --- /dev/null +++ b/proposals/mlm_class_catboost.json @@ -0,0 +1,62 @@ +{ + "id": "mlm_class_catboost", + "summary": "Initialize a CatBoost classification model", + "description": "Initializes a CatBoost classification model. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "iterations", + "description": "The maximum number of trees that can be built during the training process.", + "optional": true, + "default": 5, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 500 + } + }, + { + "name": "depth", + "description": "Depth of the trees in the CatBoost model.", + "optional": true, + "default": 5, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 16 + } + }, + { + "name": "seed", + "description": "The random seed used for training, for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": 0, + "schema": { + "type": [ + "integer", + "null" + ], + "minimum": 0, + "maximum": 2147483647 + } + } + ], + "returns": { + "description": "A model object that can be trained using ``ml_fit``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://catboost.ai/", + "title": "CatBoost Documentation", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file diff --git a/proposals/mlm_regr_svm.json b/proposals/mlm_regr_svm.json new file mode 100644 index 00000000..ba63930c --- /dev/null +++ b/proposals/mlm_regr_svm.json @@ -0,0 +1,122 @@ +{ + "id": "mlm_regr_svm", + "summary": "Initialize an SVM regression model", + "description": "Initializes a Support Vector Machine (SVM) regression model. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "kernel", + "description": "Specifies the kernel type to be used in the algorithm.", + "optional": true, + "default": "rbf", + "schema": { + "type": "string", + "enum": [ + "linear", + "poly", + "rbf", + "sigmoid" + ] + } + }, + { + "name": "C", + "description": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive.", + "optional": true, + "default": 1, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "epsilon", + "description": "Epsilon in the epsilon-SVR model. Specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value.", + "optional": true, + "default": 0.1, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "gamma", + "description": "Kernel coefficient for 'rbf', 'poly', and 'sigmoid'. Higher values lead to tighter fits.", + "optional": true, + "default": 1, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "degree", + "description": "Degree of the polynomial kernel function (only relevant for 'poly' kernel).", + "optional": true, + "default": 3, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "coef0", + "description": "Independent term in the kernel function (only relevant for 'poly' and 'sigmoid' kernels).", + "optional": true, + "default": 0, + "schema": { + "type": "number" + } + }, + { + "name": "tolerance", + "description": "Tolerance of termination criterion.", + "optional": true, + "default": 0.001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "cachesize", + "description": "Size of the kernel cache in MB.", + "optional": true, + "default": 1000, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be trained using ``ml_fit``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://link.springer.com/chapter/10.1007/978-1-4302-5990-9_4", + "title": "Awad, M., Khanna, R., Awad, M., & Khanna, R. (2015). Support vector regression. Efficient learning machines: Theories, concepts, and applications for engineers and system designers.", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file diff --git a/tests/.words b/tests/.words index fb40804c..98ce6635 100644 --- a/tests/.words +++ b/tests/.words @@ -79,3 +79,7 @@ TAE least-confidence Camara softmax +Khanna +Awad +CatBoost +epsilon-SVR \ No newline at end of file