Skip to content

svr and catboost #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `load_geojson`
- `load_ml_model`
- `load_url`
- `mlm_class_catboost`
- `mlm_class_lighttae`
- `mlm_class_mlp`
- `mlm_class_random_forest`
Expand All @@ -30,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `mlm_class_tempcnn`
- `mlm_class_xgboost`
- `mlm_regr_random_forest`
- `mlm_regr_svm`
- `ml_fit`
- `ml_label_class`
- `ml_predict`
Expand Down
178 changes: 178 additions & 0 deletions proposals-ml/mlm_class_1dcnn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
{
"id": "mlm_class_1dcnn",
"summary": "Initialize a 1D CNN classification model",
"description": "Initializes a 1D Convolutional Neural Network (CNN) classification model. The number of input channels and output classes are inferred automatically from the training data at fit time. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.",
"categories": [
"machine learning"
],
"experimental": true,
"parameters": [
{
"name": "conv_filters",
"description": "List of integers specifying the number of filters in each convolutional layer. The final output layer for classification will be added automatically based on the number of classes in the training data.",
"default": [64, 128, 256, 512],
"schema": {
"type": "array",
"items": {
"type": "integer",
"minimum": 1
},
"minItems": 4,
"maxItems": 4
}
},
{
"name": "conv_kernels",
"description": "List of integers or 'global' for kernel size in each convolutional layer. The 4th conv uses a global kernel (covers the entire sequence at that layer). The final output layer for classification will use a 1x1 kernel and be added automatically.",
"default": [3, 3, 3, "global"],
"schema": {
"type": "array",
"items": {
"oneOf": [
{"type": "integer", "minimum": 1},
{"type": "string", "enum": ["global"]}
]
},
"minItems": 4,
"maxItems": 4
}
},
{
"name": "conv_strides",
"description": "List of integers specifying the stride for each convolutional layer. The final output layer for classification will use stride 1 and be added automatically.",
"default": [1, 1, 1, 1],
"schema": {
"type": "array",
"items": {
"type": "integer",
"minimum": 1
},
"minItems": 4,
"maxItems": 4
}
},
{
"name": "use_batchnorm",
"description": "List of booleans specifying whether to use batch normalization after each convolutional layer. The final output layer will not use batch normalization.",
"default": [true, true, true, false],
"schema": {
"type": "array",
"items": {
"type": "boolean"
},
"minItems": 4,
"maxItems": 4
}
},
{
"name": "activation",
"description": "Activation function to use after each convolutional layer.",
"default": "relu",
"schema": {
"type": "string",
"enum": ["relu", "tanh", "sigmoid", "leakyrelu"]
}
},
{
"name": "maxpool_sizes",
"description": "List of integers specifying the pool size for each max pooling layer (after each of the first 3 conv blocks).",
"default": [2, 2, 2],
"schema": {
"type": "array",
"items": {
"type": "integer",
"minimum": 1
},
"minItems": 3,
"maxItems": 3
}
},
{
"name": "maxpool_strides",
"description": "List of integers specifying the stride for each max pooling layer.",
"default": [2, 2, 2],
"schema": {
"type": "array",
"items": {
"type": "integer",
"minimum": 1
},
"minItems": 3,
"maxItems": 3
}
},
{
"name": "epochs",
"description": "Number of training epochs.",
"optional": true,
"default": 100,
"schema": {
"type": "integer",
"minimum": 1
}
},
{
"name": "batch_size",
"description": "Size of the training batches.",
"optional": true,
"default": 64,
"schema": {
"type": "integer",
"minimum": 1
}
},
{
"name": "optimizer",
"description": "The optimizer to use for training.",
"optional": true,
"default": "adam",
"schema": {
"type": "string",
"enum": [
"adam",
"sgd",
"rmsprop",
"adagrad",
"nadam"
]
}
},
{
"name": "learning_rate",
"description": "The learning rate for the optimizer.",
"optional": true,
"default": 0.001,
"schema": {
"type": "number",
"minimum": 0
}
},
{
"name": "seed",
"description": "A randomization seed to use for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.",
"optional": true,
"default": null,
"schema": {
"type": [
"integer",
"null"
]
}
}
],
"returns": {
"description": "A model object that can be trained using ``ml_fit``.",
"schema": {
"type": "object",
"subtype": "ml-model"
}
},
"links": [
{
"href": "https://ieeexplore.ieee.org/document/8921180",
"title": "Song et al. (2019): Land Cover Classification for Satellite Images Through 1D CNN",
"type": "text/html",
"rel": "about"
}
]
}
62 changes: 62 additions & 0 deletions proposals-ml/mlm_class_catboost.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"id": "mlm_class_catboost",
"summary": "Initialize a CatBoost classification model",
"description": "Initializes a CatBoost classification model. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.",
"categories": [
"machine learning"
],
"experimental": true,
"parameters": [
{
"name": "iterations",
"description": "The maximum number of trees that can be built during the training process.",
"optional": true,
"default": 5,
"schema": {
"type": "integer",
"minimum": 1,
"maximum": 500
}
},
{
"name": "depth",
"description": "Depth of the trees in the CatBoost model.",
"optional": true,
"default": 5,
"schema": {
"type": "integer",
"minimum": 1,
"maximum": 16
}
},
{
"name": "seed",
"description": "The random seed used for training, for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.",
"optional": true,
"default": 0,
"schema": {
"type": [
"integer",
"null"
],
"minimum": 0,
"maximum": 2147483647
}
}
],
"returns": {
"description": "A model object that can be trained using ``ml_fit``.",
"schema": {
"type": "object",
"subtype": "ml-model"
}
},
"links": [
{
"href": "https://catboost.ai/",
"title": "CatBoost Documentation",
"type": "text/html",
"rel": "about"
}
]
}
122 changes: 122 additions & 0 deletions proposals-ml/mlm_regr_svm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
{
"id": "mlm_regr_svm",
"summary": "Initialize an SVM regression model",
"description": "Initializes a Support Vector Machine (SVM) regression model. This component sets up the model structure but does not perform training or handle data splitting. The resulting model can be trained later using ``ml_fit``.",
"categories": [
"machine learning"
],
"experimental": true,
"parameters": [
{
"name": "kernel",
"description": "Specifies the kernel type to be used in the algorithm.",
"optional": true,
"default": "rbf",
"schema": {
"type": "string",
"enum": [
"linear",
"poly",
"rbf",
"sigmoid"
]
}
},
{
"name": "C",
"description": "Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive.",
"optional": true,
"default": 1,
"schema": {
"type": "number",
"minimum": 0
}
},
{
"name": "epsilon",
"description": "Epsilon in the epsilon-SVR model. Specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value.",
"optional": true,
"default": 0.1,
"schema": {
"type": "number",
"minimum": 0
}
},
{
"name": "gamma",
"description": "Kernel coefficient for 'rbf', 'poly', and 'sigmoid'. Higher values lead to tighter fits.",
"optional": true,
"default": 1,
"schema": {
"type": "number",
"minimum": 0
}
},
{
"name": "degree",
"description": "Degree of the polynomial kernel function (only relevant for 'poly' kernel).",
"optional": true,
"default": 3,
"schema": {
"type": "integer",
"minimum": 1
}
},
{
"name": "coef0",
"description": "Independent term in the kernel function (only relevant for 'poly' and 'sigmoid' kernels).",
"optional": true,
"default": 0,
"schema": {
"type": "number"
}
},
{
"name": "tolerance",
"description": "Tolerance of termination criterion.",
"optional": true,
"default": 0.001,
"schema": {
"type": "number",
"minimum": 0
}
},
{
"name": "cachesize",
"description": "Size of the kernel cache in MB.",
"optional": true,
"default": 1000,
"schema": {
"type": "integer",
"minimum": 1
}
},
{
"name": "seed",
"description": "A randomization seed to use for reproducibility. If not given or `null`, no seed is used and results may differ on subsequent use.",
"optional": true,
"default": null,
"schema": {
"type": [
"integer",
"null"
]
}
}
],
"returns": {
"description": "A model object that can be trained using ``ml_fit``.",
"schema": {
"type": "object",
"subtype": "ml-model"
}
},
"links": [
{
"href": "https://link.springer.com/chapter/10.1007/978-1-4302-5990-9_4",
"title": "Awad, M., Khanna, R., Awad, M., & Khanna, R. (2015). Support vector regression. Efficient learning machines: Theories, concepts, and applications for engineers and system designers.",
"type": "text/html",
"rel": "about"
}
]
}
Loading