PondiB · PondiB · May 28, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
     - `load_geojson`
     - `load_ml_model`
     - `load_url`
+    - `mlm_class_catboost`
     - `mlm_class_lighttae`
     - `mlm_class_mlp`
     - `mlm_class_random_forest`
@@ -30,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
     - `mlm_class_tempcnn`
     - `mlm_class_xgboost`
     - `mlm_regr_random_forest`
+    - `mlm_regr_svm`
     - `ml_fit`
     - `ml_label_class`
     - `ml_predict`

diff --git a/proposals-ml/mlm_class_1dcnn.json b/proposals-ml/mlm_class_1dcnn.json
@@ -0,0 +1,178 @@
+{
+    "id": "mlm_class_1dcnn",
+    "summary": "Initialize a 1D CNN classification model",
+    "description": "Initializes a 1D Convolutional Neural Network (CNN) classification model. The number of input channels and output classes are inferred automatically from the training data at fit time. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "conv_filters",
+            "description": "List of integers specifying the number of filters in each convolutional layer. The final output layer for classification will be added automatically based on the number of classes in the training data.",
+            "default": [64, 128, 256, 512],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "integer",
+                    "minimum": 1
+                },
+                "minItems": 4,
+                "maxItems": 4
+            }
+        },
+        {
+            "name": "conv_kernels",
+            "description": "List of integers or 'global' for kernel size in each convolutional layer. The 4th conv uses a global kernel (covers the entire sequence at that layer). The final output layer for classification will use a 1x1 kernel and be added automatically.",
+            "default": [3, 3, 3, "global"],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "oneOf": [
+                        {"type": "integer", "minimum": 1},
+                        {"type": "string", "enum": ["global"]}
+                    ]
+                },
+                "minItems": 4,
+                "maxItems": 4
+            }
+        },
+        {
+            "name": "conv_strides",
+            "description": "List of integers specifying the stride for each convolutional layer. The final output layer for classification will use stride 1 and be added automatically.",
+            "default": [1, 1, 1, 1],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "integer",
+                    "minimum": 1
+                },
+                "minItems": 4,
+                "maxItems": 4
+            }
+        },
+        {
+            "name": "use_batchnorm",
+            "description": "List of booleans specifying whether to use batch normalization after each convolutional layer. The final output layer will not use batch normalization.",
+            "default": [true, true, true, false],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "boolean"
+                },
+                "minItems": 4,
+                "maxItems": 4
+            }
+        },
+        {
+            "name": "activation",
+            "description": "Activation function to use after each convolutional layer.",
+            "default": "relu",
+            "schema": {
+                "type": "string",
+                "enum": ["relu", "tanh", "sigmoid", "leakyrelu"]
+            }
+        },
+        {
+            "name": "maxpool_sizes",
+            "description": "List of integers specifying the pool size for each max pooling layer (after each of the first 3 conv blocks).",
+            "default": [2, 2, 2],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "integer",
+                    "minimum": 1
+                },
+                "minItems": 3,
+                "maxItems": 3
+            }
+        },
+        {
+            "name": "maxpool_strides",
+            "description": "List of integers specifying the stride for each max pooling layer.",
+            "default": [2, 2, 2],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "integer",
+                    "minimum": 1
+                },
+                "minItems": 3,
+                "maxItems": 3
+            }
+        },
+        {
+            "name": "epochs",
+            "description": "Number of training epochs.",
+            "optional": true,
+            "default": 100,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "batch_size",
+            "description": "Size of the training batches.",
+            "optional": true,
+            "default": 64,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "optimizer",
+            "description": "The optimizer to use for training.",
+            "optional": true,
+            "default": "adam",
+            "schema": {
+                "type": "string",
+                "enum": [
+                    "adam",
+                    "sgd",
+                    "rmsprop",
+                    "adagrad",
+                    "nadam"
+                ]
+            }
+        },
+        {
+            "name": "learning_rate",
+            "description": "The learning rate for the optimizer.",
+            "optional": true,
+            "default": 0.001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "seed",
+            "description": "A randomization seed to use for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.",
+            "optional": true,
+            "default": null,
+            "schema": {
+                "type": [
+                    "integer",
+                    "null"
+                ]
+            }
+        }
+    ],
+    "returns": {
+        "description": "A model object that can be trained using ``ml_fit``.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    },
+    "links": [
+        {
+            "href": "https://ieeexplore.ieee.org/document/8921180",
+            "title": "Song et al. (2019): Land Cover Classification for Satellite Images Through 1D CNN",
+            "type": "text/html",
+            "rel": "about"
+        }
+    ]
+} 
diff --git a/proposals-ml/mlm_class_catboost.json b/proposals-ml/mlm_class_catboost.json
@@ -0,0 +1,62 @@
+{
+    "id": "mlm_class_catboost",
+    "summary": "Initialize a CatBoost classification model",
+    "description": "Initializes a CatBoost classification model. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "iterations",
+            "description": "The maximum number of trees that can be built during the training process.",
+            "optional": true,
+            "default": 5,
+            "schema": {
+                "type": "integer",
+                "minimum": 1,
+                "maximum": 500
+            }
+        },
+        {
+            "name": "depth",
+            "description": "Depth of the trees in the CatBoost model.",
+            "optional": true,
+            "default": 5,
+            "schema": {
+                "type": "integer",
+                "minimum": 1,
+                "maximum": 16
+            }
+        },
+        {
+            "name": "seed",
+            "description": "The random seed used for training, for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.",
+            "optional": true,
+            "default": 0,
+            "schema": {
+                "type": [
+                    "integer",
+                    "null"
+                ],
+                "minimum": 0,
+                "maximum": 2147483647
+            }
+        }
+    ],
+    "returns": {
+        "description": "A model object that can be trained using ``ml_fit``.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    },
+    "links": [
+        {
+            "href": "https://catboost.ai/",
+            "title": "CatBoost Documentation",
+            "type": "text/html",
+            "rel": "about"
+        }
+    ]
+} 
diff --git a/proposals-ml/mlm_regr_svm.json b/proposals-ml/mlm_regr_svm.json
@@ -0,0 +1,122 @@
+{
+    "id": "mlm_regr_svm",
+    "summary": "Initialize an SVM regression model",
+    "description": "Initializes a Support Vector Machine (SVM) regression model. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "kernel",
+            "description": "Specifies the kernel type to be used in the algorithm.",
+            "optional": true,
+            "default": "rbf",
+            "schema": {
+                "type": "string",
+                "enum": [
+                    "linear",
+                    "poly",
+                    "rbf",
+                    "sigmoid"
+                ]
+            }
+        },
+        {
+            "name": "C",
+            "description": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive.",
+            "optional": true,
+            "default": 1,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "epsilon",
+            "description": "Epsilon in the epsilon-SVR model. Specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value.",
+            "optional": true,
+            "default": 0.1,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "gamma",
+            "description": "Kernel coefficient for 'rbf', 'poly', and 'sigmoid'. Higher values lead to tighter fits.",
+            "optional": true,
+            "default": 1,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "degree",
+            "description": "Degree of the polynomial kernel function (only relevant for 'poly' kernel).",
+            "optional": true,
+            "default": 3,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "coef0",
+            "description": "Independent term in the kernel function (only relevant for 'poly' and 'sigmoid' kernels).",
+            "optional": true,
+            "default": 0,
+            "schema": {
+                "type": "number"
+            }
+        },
+        {
+            "name": "tolerance",
+            "description": "Tolerance of termination criterion.",
+            "optional": true,
+            "default": 0.001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "cachesize",
+            "description": "Size of the kernel cache in MB.",
+            "optional": true,
+            "default": 1000,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "seed",
+            "description": "A randomization seed to use for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.",
+            "optional": true,
+            "default": null,
+            "schema": {
+                "type": [
+                    "integer",
+                    "null"
+                ]
+            }
+        }
+    ],
+    "returns": {
+        "description": "A model object that can be trained using ``ml_fit``.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    },
+    "links": [
+        {
+            "href": "https://link.springer.com/chapter/10.1007/978-1-4302-5990-9_4",
+            "title": "Awad, M., Khanna, R., Awad, M., & Khanna, R. (2015). Support vector regression. Efficient learning machines: Theories, concepts, and applications for engineers and system designers.",
+            "type": "text/html",
+            "rel": "about"
+        }
+    ]
+}