From 82a3aacfd78efdc203ebfdac61a522f66fa20ed2 Mon Sep 17 00:00:00 2001 From: kingfengji Date: Thu, 1 Jun 2017 14:24:47 +0800 Subject: [PATCH] publish --- README.md | 238 ++++ README.txt | 260 +++++ datasets/gtzan/get_data.sh | 8 + datasets/gtzan/splits/blues.train | 70 ++ datasets/gtzan/splits/blues.trainval | 100 ++ datasets/gtzan/splits/blues.val | 30 + datasets/gtzan/splits/classical.train | 70 ++ datasets/gtzan/splits/classical.trainval | 100 ++ datasets/gtzan/splits/classical.val | 30 + datasets/gtzan/splits/country.train | 70 ++ datasets/gtzan/splits/country.trainval | 100 ++ datasets/gtzan/splits/country.val | 30 + datasets/gtzan/splits/disco.train | 70 ++ datasets/gtzan/splits/disco.trainval | 100 ++ datasets/gtzan/splits/disco.val | 30 + datasets/gtzan/splits/genre.train | 700 ++++++++++++ datasets/gtzan/splits/genre.trainval | 1000 +++++++++++++++++ datasets/gtzan/splits/genre.val | 300 +++++ datasets/gtzan/splits/genres.trainval | 1000 +++++++++++++++++ datasets/gtzan/splits/hiphop.train | 70 ++ datasets/gtzan/splits/hiphop.trainval | 100 ++ datasets/gtzan/splits/hiphop.val | 30 + datasets/gtzan/splits/jazz.train | 70 ++ datasets/gtzan/splits/jazz.trainval | 100 ++ datasets/gtzan/splits/jazz.val | 30 + datasets/gtzan/splits/metal.train | 70 ++ datasets/gtzan/splits/metal.trainval | 100 ++ datasets/gtzan/splits/metal.val | 30 + datasets/gtzan/splits/pop.train | 70 ++ datasets/gtzan/splits/pop.trainval | 100 ++ datasets/gtzan/splits/pop.val | 30 + datasets/gtzan/splits/reggae.train | 70 ++ datasets/gtzan/splits/reggae.trainval | 100 ++ datasets/gtzan/splits/reggae.val | 30 + datasets/gtzan/splits/rock.train | 70 ++ datasets/gtzan/splits/rock.trainval | 100 ++ datasets/gtzan/splits/rock.val | 30 + datasets/uci_adult/features | 14 + datasets/uci_adult/get_data.sh | 8 + datasets/uci_letter/get_data.sh | 7 + datasets/uci_semg/get_data.sh | 8 + datasets/uci_yeast/get_data.sh | 7 + datasets/uci_yeast/yeast.label | 10 + lib/gcforest/__init__.py | 0 lib/gcforest/cascade/__init__.py | 0 lib/gcforest/cascade/cascade_classifier.py | 231 ++++ lib/gcforest/data_cache.py | 118 ++ lib/gcforest/datasets/__init__.py | 52 + lib/gcforest/datasets/cifar10.py | 55 + lib/gcforest/datasets/ds_base.py | 81 ++ lib/gcforest/datasets/ds_pickle.py | 44 + lib/gcforest/datasets/ds_pickle2.py | 23 + lib/gcforest/datasets/gtzan.py | 113 ++ lib/gcforest/datasets/imdb.py | 71 ++ lib/gcforest/datasets/mnist.py | 51 + lib/gcforest/datasets/olivetti_face.py | 41 + lib/gcforest/datasets/uci_adult.py | 101 ++ lib/gcforest/datasets/uci_letter.py | 40 + lib/gcforest/datasets/uci_semg.py | 74 ++ lib/gcforest/datasets/uci_yeast.py | 64 ++ lib/gcforest/estimators/__init__.py | 29 + lib/gcforest/estimators/base_estimator.py | 134 +++ lib/gcforest/estimators/est_utils.py | 31 + lib/gcforest/estimators/kfold_wrapper.py | 175 +++ lib/gcforest/estimators/sklearn_estimators.py | 52 + lib/gcforest/exp_utils.py | 150 +++ lib/gcforest/fgnet.py | 147 +++ lib/gcforest/layers/__init__.py | 33 + lib/gcforest/layers/base_layer.py | 60 + lib/gcforest/layers/fg_concat_layer.py | 50 + lib/gcforest/layers/fg_pool_layer.py | 69 ++ lib/gcforest/layers/fg_win_layer.py | 126 +++ lib/gcforest/utils/__init__.py | 0 lib/gcforest/utils/audio_utils.py | 49 + lib/gcforest/utils/cache_utils.py | 5 + lib/gcforest/utils/config_utils.py | 57 + lib/gcforest/utils/debug_utils.py | 8 + lib/gcforest/utils/log_utils.py | 45 + lib/gcforest/utils/metrics.py | 37 + lib/gcforest/utils/win_utils.py | 79 ++ .../fg-tree500-depth100-3folds-ca.json | 46 + .../gcforest/fg-tree500-depth100-3folds.json | 91 ++ .../gcforest/ca-tree500-n4x2-3folds.json | 30 + .../fg-tree500-depth100-3folds-ca.json | 42 + .../gcforest/fg-tree500-depth100-3folds.json | 86 ++ .../imdb/gcforest/ca-tree500-n4x2-3folds.json | 29 + .../gcforest/ca-tree500-n4x2-3folds.json | 30 + .../fg-tree500-depth100-3folds-ca.json | 45 + .../gcforest/fg-tree500-depth100-3folds.json | 91 ++ .../gcforest/ca-tree500-n4x2-3folds.json | 29 + .../gcforest/ca-tree500-n4x2-3folds.json | 29 + .../gcforest/ca-tree500-n4x2-3folds.json | 30 + .../fg-tree500-depth100-3folds-ca.json | 44 + .../gcforest/fg-tree500-depth100-3folds.json | 87 ++ .../gcforest/ca-tree500-n4x2-3folds.json | 29 + requirements.txt | 8 + tools/audio/cache_feature.py | 85 ++ tools/train_cascade.py | 44 + tools/train_fg.py | 60 + tools/train_xgb.py | 58 + 100 files changed, 9048 insertions(+) create mode 100644 README.md create mode 100644 README.txt create mode 100644 datasets/gtzan/get_data.sh create mode 100644 datasets/gtzan/splits/blues.train create mode 100644 datasets/gtzan/splits/blues.trainval create mode 100644 datasets/gtzan/splits/blues.val create mode 100644 datasets/gtzan/splits/classical.train create mode 100644 datasets/gtzan/splits/classical.trainval create mode 100644 datasets/gtzan/splits/classical.val create mode 100644 datasets/gtzan/splits/country.train create mode 100644 datasets/gtzan/splits/country.trainval create mode 100644 datasets/gtzan/splits/country.val create mode 100644 datasets/gtzan/splits/disco.train create mode 100644 datasets/gtzan/splits/disco.trainval create mode 100644 datasets/gtzan/splits/disco.val create mode 100644 datasets/gtzan/splits/genre.train create mode 100644 datasets/gtzan/splits/genre.trainval create mode 100644 datasets/gtzan/splits/genre.val create mode 100644 datasets/gtzan/splits/genres.trainval create mode 100644 datasets/gtzan/splits/hiphop.train create mode 100644 datasets/gtzan/splits/hiphop.trainval create mode 100644 datasets/gtzan/splits/hiphop.val create mode 100644 datasets/gtzan/splits/jazz.train create mode 100644 datasets/gtzan/splits/jazz.trainval create mode 100644 datasets/gtzan/splits/jazz.val create mode 100644 datasets/gtzan/splits/metal.train create mode 100644 datasets/gtzan/splits/metal.trainval create mode 100644 datasets/gtzan/splits/metal.val create mode 100644 datasets/gtzan/splits/pop.train create mode 100644 datasets/gtzan/splits/pop.trainval create mode 100644 datasets/gtzan/splits/pop.val create mode 100644 datasets/gtzan/splits/reggae.train create mode 100644 datasets/gtzan/splits/reggae.trainval create mode 100644 datasets/gtzan/splits/reggae.val create mode 100644 datasets/gtzan/splits/rock.train create mode 100644 datasets/gtzan/splits/rock.trainval create mode 100644 datasets/gtzan/splits/rock.val create mode 100644 datasets/uci_adult/features create mode 100644 datasets/uci_adult/get_data.sh create mode 100644 datasets/uci_letter/get_data.sh create mode 100644 datasets/uci_semg/get_data.sh create mode 100644 datasets/uci_yeast/get_data.sh create mode 100644 datasets/uci_yeast/yeast.label create mode 100644 lib/gcforest/__init__.py create mode 100644 lib/gcforest/cascade/__init__.py create mode 100644 lib/gcforest/cascade/cascade_classifier.py create mode 100644 lib/gcforest/data_cache.py create mode 100644 lib/gcforest/datasets/__init__.py create mode 100644 lib/gcforest/datasets/cifar10.py create mode 100644 lib/gcforest/datasets/ds_base.py create mode 100644 lib/gcforest/datasets/ds_pickle.py create mode 100644 lib/gcforest/datasets/ds_pickle2.py create mode 100644 lib/gcforest/datasets/gtzan.py create mode 100644 lib/gcforest/datasets/imdb.py create mode 100644 lib/gcforest/datasets/mnist.py create mode 100644 lib/gcforest/datasets/olivetti_face.py create mode 100644 lib/gcforest/datasets/uci_adult.py create mode 100644 lib/gcforest/datasets/uci_letter.py create mode 100644 lib/gcforest/datasets/uci_semg.py create mode 100644 lib/gcforest/datasets/uci_yeast.py create mode 100644 lib/gcforest/estimators/__init__.py create mode 100644 lib/gcforest/estimators/base_estimator.py create mode 100644 lib/gcforest/estimators/est_utils.py create mode 100644 lib/gcforest/estimators/kfold_wrapper.py create mode 100644 lib/gcforest/estimators/sklearn_estimators.py create mode 100644 lib/gcforest/exp_utils.py create mode 100644 lib/gcforest/fgnet.py create mode 100644 lib/gcforest/layers/__init__.py create mode 100644 lib/gcforest/layers/base_layer.py create mode 100644 lib/gcforest/layers/fg_concat_layer.py create mode 100644 lib/gcforest/layers/fg_pool_layer.py create mode 100644 lib/gcforest/layers/fg_win_layer.py create mode 100644 lib/gcforest/utils/__init__.py create mode 100644 lib/gcforest/utils/audio_utils.py create mode 100644 lib/gcforest/utils/cache_utils.py create mode 100644 lib/gcforest/utils/config_utils.py create mode 100644 lib/gcforest/utils/debug_utils.py create mode 100644 lib/gcforest/utils/log_utils.py create mode 100644 lib/gcforest/utils/metrics.py create mode 100644 lib/gcforest/utils/win_utils.py create mode 100644 models/cifar10/gcforest/fg-tree500-depth100-3folds-ca.json create mode 100644 models/cifar10/gcforest/fg-tree500-depth100-3folds.json create mode 100644 models/gtzan/gcforest/ca-tree500-n4x2-3folds.json create mode 100644 models/gtzan/gcforest/fg-tree500-depth100-3folds-ca.json create mode 100644 models/gtzan/gcforest/fg-tree500-depth100-3folds.json create mode 100644 models/imdb/gcforest/ca-tree500-n4x2-3folds.json create mode 100644 models/mnist/gcforest/ca-tree500-n4x2-3folds.json create mode 100644 models/mnist/gcforest/fg-tree500-depth100-3folds-ca.json create mode 100644 models/mnist/gcforest/fg-tree500-depth100-3folds.json create mode 100644 models/uci_adult/gcforest/ca-tree500-n4x2-3folds.json create mode 100644 models/uci_letter/gcforest/ca-tree500-n4x2-3folds.json create mode 100644 models/uci_semg/gcforest/ca-tree500-n4x2-3folds.json create mode 100644 models/uci_semg/gcforest/fg-tree500-depth100-3folds-ca.json create mode 100644 models/uci_semg/gcforest/fg-tree500-depth100-3folds.json create mode 100644 models/uci_yeast/gcforest/ca-tree500-n4x2-3folds.json create mode 100644 requirements.txt create mode 100644 tools/audio/cache_feature.py create mode 100644 tools/train_cascade.py create mode 100644 tools/train_fg.py create mode 100644 tools/train_xgb.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..456e490 --- /dev/null +++ b/README.md @@ -0,0 +1,238 @@ +gcForest v1.0 +======== +Package Official Website: http://lamda.nju.edu.cn/code_gcForest.ashx + +This package is provided "AS IS" and free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou (zhouzh@lamda.nju.edu.cn). + +Description: A python 2.7 implementation of gcForest proposed in [1]. +A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. +The implementation is flexible enough for modifying the model or fit your own datasets. + +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. + In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) + +Requirements: This package is developed with Python 2.7, please make sure all the dependencies are installed, +which is specified in requirements.txt + +ATTN: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn).For any problem concerning the codes, please feel free to contact Mr.Feng. + + + +package dependencies +======== +The package is developed in python 2.7 + +run the following command to install dependencies before running the code: +```pip install -r requirements.txt``` + + + +Outline for README +======== +* Package Overview +* Notes on Demo Scripts +* Notes on Model Specification Files +* Example +* Using Own Dataset + +Package Overview +======== +* lib/gcforest + - code for the implementations for gcforest +* tools/train_fg.py + - the demo script used for training Fine grained Layers +* tools/train_cascade.py + - the demo script used for training Cascade Layers +* models/ + - folder to save models which can be used in tools/train_fg.py and tools/train_cascade.py + - the gcForest structure is saved in json format +* logs + - folder logs/gcforest is used to save the logfiles produced by demo scripts + + + +Demo Scripts +===== +Here we give a brief discription on the args needed for demo scripts + +tools/train_fg.py +----------------- +* --model: str + - The config filepath for Fine grained models (in json format) +* --save_outputs: bool + - if True. The output predictions produced by Fine Grained Model will be saved in model_cache_dir which is specified in Model Config. This output will be used when Training Cascade Layer. + - the default value is false + +tools/train_cascade.py +---------------------- +* --model: str + - The model config filepath for cascade training (in json format) + + + +Config Files +================= +Here we give a brief introduction on how to use model specification files, namely +* model specification for fine grained scanning stucture. +* model specification for cascade forests. + +All the model specifications(in json files) are saved in models/ +For instance, all the model specification files needed for MNIST is stored in models/mnist/gcforest +* ca is short for cascade structure specifications +* fg is short for fine-grained structure specifications + +You can define your own structure by writing similar json files. + +FineGrained model's config (dataset) +------------------------ +* dataset.train, dataset.test: [dict] + - coresponds to the particular datasets defined in lib/datasets + - type [str]: see lib/datasets/__init__.py for a reference + - You can use your own dataset by writing similar wrappers. + + +FineGrained model's config (train) +---------------------------------- + +* train.keep_model_in_mem: [bool] default=0 + - if 0, the forest will be freed in RAM +* train.data_cache : [dict] + - coresponds to the DataCache in lib/dataset/data_cache.py +* train.data_cache.cache_dir (str) + - make sure to change "/mnt/raid/fengji/gcforest/cifar10/fg-tree500-depth100-3folds/datas" to your own path + +FineGrained model's config (net) +---------------------------------- +* net.outputs: [list] + - List of the data names output by this model +* net.layers: [List of Layers] + - Layer's Config, see lib/gcforest/layers for a reference + +Cascade model's config (dataset) +------------------------------ +Similar as FineGrained's model config (dataset) + +Cascade model's config (cascade) +------------------------------ +see lib/gcforest/cascade/cascade_classifier.py __init__ for a reference + + + +Examples +======== +Before runing the scripts, make sure to change + +* train.data_cache.cache_dir in the Finegrained Model Config (eg: model/xxx/fg-xxxx.json) +* train.cascade.dataset.{train,test}.data_path in the Finegrained-Cascade Model Config (eg: model/xxx/fg-xxxx-ca.json) +* train.cascade.cascade.data_save_dir in the Finegrained Model Config (eg: model/xxx/ca-xxxx.json and model/xxx/fg-xxxx-ca.json) + +To Train a gcForest(with fine grained scanning), you need to run two scripts. + +* Fine Grained Scanning: 'tools/train_fg.py' +* Cascade Training: 'tools/train_cascade.py' + + + + +[UCI Letter](http://archive.ics.uci.edu/ml/datasets/Letter+Recognition) +------------ +* Get Data: you need to download the data by yourself by running the following command: +```Shell +cd dataset/uci_letter +sh get_data.sh +``` +* Since we do not need to fine-grained scaning, we only train a Cascade Forest as follows: + - `python tools/train_cascade.py --model models/uci_letter/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/uci_letter/ca` + +* UCI-Adult, YEAST can be trained with similar procedure. + + + + +MNIST +----- +* Get the data: The data will be automatically downloaded via 'lib/datasets/mnist.py', you do not need to do it yourself +* First Train the Fine Grained Forest: + - Run `python tools/train_fg.py --model models/mnist/gcforest/fg-tree500-depth100-3folds.json --log_dir logs/gcforest/mnist/fg --save_outputs` + - This means: + 1. Train a fine grained model for MNIST dataset, + 2. Using the structure defined in models/mnist/gcforest/fg-tree500-depth100-3folds.json + 3. save the log files in logs/gcforest/mnist/fg + 4. The output for the fine grained scanning predictions is saved in train.data_cache.cache_dir +* Then, train the cascade forest (Note: make sure you run the train_fg.py first) + - run `python tools/train_cascade.py --model models/mnist/gcforest/fg-tree500-depth100-3folds-ca.json` + - This means: + 1. Train the fine grained scaning results with cascade structure. + 2. The cascade model specification is defined in 'models/mnist/gcforest/fg-tree500-depth100-3folds-ca.json' +* You could also training a Cascade Forest without fine-grained scanning(but the accuracy will be much lower): + - Run `python tools/train_cascade.py --model models/mnist/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/mnist/ca` + + + +[UCI sEMG](http://archive.ics.uci.edu/ml/datasets/sEMG+for+Basic+Hand+movements) +-------- +* Get Data +```Shell +cd dataset/uci_semg +sh get_data.sh +``` +* First Train the Fine Grained Forest: + - `python tools/train_fg.py --model models/uci_semg/gcforest/fg-tree500-depth100-3folds.json --save_outputs --log_dir logs/gcforest/uci_semg/fg` +* Then, train the cascade forest (Note: make sure you run the train_fg.py first) + - `python tools/train_cascade.py --model models/uci_semg/gcforest/fg-tree500-depth100-3folds-ca.json --log_dir logs/gcforest/uci_semg/gc` +* You could also training a Cascade Forest without fine-grained scanning(but the accuracy will be much lower): + - `python tools/train_cascade.py --model models/uci_semg/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/uci_semg/ca` + + +[GTZAN](http://marsyasweb.appspot.com/download/data_sets/) +------- +* Requirements(you need to install the following package) +librosa + +* Get Data by yourself by running the following command +```Shell +cd dataset/gtzan +sh get_data.sh +cd ../.. +python tools/audio/cache_feature.py --dataset gtzan --feature mfcc --split genre.trainval +``` + +* First Train the Fine Grained Forest: + - `python tools/train_fg.py --model models/gtzan/gcforest/fg-tree500-depth100-3folds.json --save_outputs --log_dir logs/gcforest/gtzan/fg` +* Then, train the cascade forest (Note: make sure you run the train_fg.py first) + - `python tools/train_cascade.py --model models/gtzan/gcforest/fg-tree500-depth100-3folds-ca.json --log_dir logs/gcforest/gtzan/gc` +* You could also training a Cascade Forest without fine-grained scanning(but the accuracy will be much lower): + - `python tools/train_cascade.py --model models/gtzan/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/gtzan/ca --save_outputs` + +IMDB +---- +* Cascade Forest: + - `python tools/train_cascade.py --model models/imdb/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/imdb/ca` + +CIFAR10 +------- +* First Train the Fine Grained Forest: + - `python tools/train_fg.py --model models/cifar10/gcforest/fg-tree500-depth100-3folds.json --save_outputs` +* Then, train the cascade forest (Note: make sure you run the train_fg.py first) + - `python tools/train_cascade.py --model models/cifar10/gcforest/fg-tree500-depth100-3folds-ca.json` + + +For You Own Datasets +======== +* Data Format: + 0. Please refer lib/datasets/mnist.py as an example + 1. the dataset should has attribute X,y to represent the data and label + 2. y should be 1-d array + 3. For fine-grained scanning, X should be 4-d array (N x channel x H x W). (e.g. cifar10 shoud be Nx3x32x32, mnist should be Nx1x28x28, uci_semg should be Nx1x3000x1) +* Model Specifications: + 1. Save the json file in models/$dataset_name (recommended) + 2. for a detailed description, see section 'Config Files' +* If you only need to train a cascade forest, run tools/train_cascade.py. + +Happy Hacking. + + + + + + diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..e3f7c38 --- /dev/null +++ b/README.txt @@ -0,0 +1,260 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%Description: A python 2.7 implementation of gcForest proposed in [1]. % +%A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. % +%The implementation is flexible enough for modifying the model or fit your own datasets. % +% % +%Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. % +% In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) % +% % +%Requirements: This package is developed with Python 2.7, please make sure all the dependencies are installed, % +%which is specified in requirements.txt % +% % +%ATTN: This package is free for academic usage. % +% You can run it at your own risk. % +% For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) % +% % +%ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). % +% The readme file and demo roughly explains how to use the codes. % +% For any problem concerning the codes, please feel free to contact Mr.Feng. % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Package Official Website: http://lamda.nju.edu.cn/code_gcForest.ashx + + +This package is provided "AS IS" and free for academic usage. +You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou (zhouzh@lamda.nju.edu.cn). + +Before running the demo, make sure all the dependencies are installed, for instance, please +run the following command to install dependencies before running the code: +```pip install -r requirements.txt``` + + +=================================== + Outline for README +==================================== +* Package Overview +* Notes on Demo Scripts +* Notes on Model Specification Files +* Example and Demos +* Using Own Dataset + +================================== + Package Overview +================================== +* lib/gcforest + - code for the implementations for gcforest +* tools/train_fg.py + - the demo script used for training Fine grained Layers +* tools/train_cascade.py + - the demo script used for training Cascade Layers +* models/ + - folder to save models which can be used in tools/train_fg.py and tools/train_cascade.py + - the gcForest structure is saved in json format +* logs + - folder logs/gcforest is used to save the logfiles produced by demo scripts + + +============================ + Notes on Demo Scripts +============================ +Below is a brief description on the args needed for demo scripts + + +%%%%%%%%%%%%%%%%%%%% +tools/train_fg.py +%%%%%%%%%%%%%%%%%%%% +* --model: str + - The config filepath for Fine grained models (in json format) +* --save_outputs: bool + - if True. The output predictions produced by Fine Grained Model + will be saved in model_cache_dir which is specified in Model Config. + This output will be used when Training Cascade Layer. + - the default value is false + + +%%%%%%%%%%%%%%%%%%%%%% +tools/train_cascade.py +%%%%%%%%%%%%%%%%%%%%%% +* --model: str + - The model config filepath for cascade training (in json format) + + +%%%%%%%%%%%%%%%%%%%%%% +Notes on Config Files +%%%%%%%%%%%%%%%%%%%%%% +Below is a brief introduction on how to use model specification files, namely +* model specification for fine grained scanning structure. +* model specification for cascade forests. + +All the model specifications (in json files) are saved in models/ +For instance, all the model specification files needed for MNIST is stored in models/mnist/gcforest +* ca is short for cascade structure specifications +* fg is short for fine-grained structure specifications + +You can define your own structure by writing similar json files. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +FineGrained model's config (dataset) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +* dataset.train, dataset.test: [dict] + - coresponds to the particular datasets defined in lib/datasets + - type [str]: see lib/datasets/__init__.py for a reference + - You can use your own dataset by writing similar wrappers. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +FineGrained model's config (train) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +* train.keep_model_in_mem: [bool] default=0 + - if 0, the forest will be freed in RAM +* train.data_cache : [dict] + - coresponds to the DataCache in lib/dataset/data_cache.py +* train.data_cache.cache_dir (str) + - make sure to change "/mnt/raid/fengji/gcforest/cifar10/fg-tree500-depth100-3folds/datas" to your own path + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +FineGrained model's config (net) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +* net.outputs: [list] + - List of the data names output by this model +* net.layers: [List of Layers] + - Layer's Config, see lib/gcforest/layers for a reference + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +Cascade model's config (dataset) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +Similar as FineGrained's model config (dataset) + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +Cascade model's config (cascade) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +see lib/gcforest/cascade/cascade_classifier.py __init__ for a reference + + +============================= + Examples and Demos +============================= +Before running the scripts, make sure to change + +* train.data_cache.cache_dir in the Finegrained Model Config (eg: model/xxx/fg-xxxx.json) +* train.cascade.dataset.{train,test}.data_path in the Finegrained-Cascade Model Config (eg: model/xxx/fg-xxxx-ca.json) +* train.cascade.cascade.data_save_dir in the Finegrained Model Config (eg: model/xxx/ca-xxxx.json and model/xxx/fg-xxxx-ca.json) + +To Train a gcForest(with fine grained scanning), you need to run two scripts. + +* Fine Grained Scanning: 'tools/train_fg.py' +* Cascade Training: 'tools/train_cascade.py' + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +[UCI Letter](http://archive.ics.uci.edu/ml/datasets/Letter+Recognition) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +* Get Data: you need to download the data by yourself by running the following command: +```Shell +cd dataset/uci_letter +sh get_data.sh +``` +* Since we do not need to fine-grained scaning, we only train a Cascade Forest as follows: + - `python tools/train_cascade.py --model models/uci_letter/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/uci_letter/ca` + +* Adult, YEAST can be trained with similar procedure. + +%%%%%%%%%%%%%%%%%%%%% + MNIST +%%%%%%%%%%%%%%%%%%%%% +* Get the data: The data will be automatically downloaded via 'lib/datasets/mnist.py', you do not need to do it yourself +* First Train the Fine Grained Forest: + - Run `python tools/train_fg.py --model models/mnist/gcforest/fg-tree500-depth100-3folds.json --log_dir logs/gcforest/mnist/fg --save_outputs` + - This means: + 1. Train a fine grained model for MNIST dataset, + 2. Using the structure defined in models/mnist/gcforest/fg-tree500-depth100-3folds.json + 3. save the log files in logs/gcforest/mnist/fg + 4. The output for the fine grained scanning predictions is saved in train.data_cache.cache_dir +* Then, train the cascade forest (Note: make sure you run the train_fg.py first) + - run `python tools/train_cascade.py --model models/mnist/gcforest/fg-tree500-depth100-3folds-ca.json` + - This means: + 1. Train the fine grained scaning results with cascade structure. + 2. The cascade model specification is defined in 'models/mnist/gcforest/fg-tree500-depth100-3folds-ca.json' +* You could also train a Cascade Forest without fine-grained scanning (but the accuracy will be much lower): + - Run `python tools/train_cascade.py --model models/mnist/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/mnist/ca` + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +[UCI sEMG](http://archive.ics.uci.edu/ml/datasets/sEMG+for+Basic+Hand+movements) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +* Get Data +```Shell +cd dataset/uci_semg +sh get_data.sh +``` +* First Train the Fine Grained Forest: + - `python tools/train_fg.py --model models/uci_semg/gcforest/fg-tree500-depth100-3folds.json --save_outputs --log_dir logs/gcforest/uci_semg/fg` +* Then, train the cascade forest (Note: make sure you run the train_fg.py first) + - `python tools/train_cascade.py --model models/uci_semg/gcforest/fg-tree500-depth100-3folds-ca.json --log_dir logs/gcforest/uci_semg/gc` +* You could also training a Cascade Forest without fine-grained scanning(but the accuracy will be much lower): + - `python tools/train_cascade.py --model models/uci_semg/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/uci_semg/ca` + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +[GTZAN](http://marsyasweb.appspot.com/download/data_sets/) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +* Requirements(you need to install the following package) +librosa + +* Get Data by yourself by running the following command +```Shell +cd dataset/gtzan +sh get_data.sh +cd ../.. +python tools/audio/cache_feature.py --dataset gtzan --feature mfcc --split genre.trainval +``` + +* First Train the Fine Grained Forest: + - `python tools/train_fg.py --model models/gtzan/gcforest/fg-tree500-depth100-3folds.json --save_outputs --log_dir logs/gcforest/gtzan/fg` +* Then, train the cascade forest (Note: make sure you run the train_fg.py first) + - `python tools/train_cascade.py --model models/gtzan/gcforest/fg-tree500-depth100-3folds-ca.json --log_dir logs/gcforest/gtzan/gc` +* You could also training a Cascade Forest without fine-grained scanning(but the accuracy will be much lower): + - `python tools/train_cascade.py --model models/gtzan/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/gtzan/ca --save_outputs` + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + IMDB +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +* Cascade Forest: + - `python tools/train_cascade.py --model models/imdb/gcforest/ca-tree500-n4x2-3folds.json --log_dir logs/gcforest/imdb/ca` + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + CIFAR10 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +* First Train the Fine Grained Forest: + - `python tools/train_fg.py --model models/cifar10/gcforest/fg-tree500-depth100-3folds.json --save_outputs` +* Then, train the cascade forest (Note: make sure you run the train_fg.py first) + - `python tools/train_cascade.py --model models/cifar10/gcforest/fg-tree500-depth100-3folds-ca.json` + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + For You Own Datasets +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +* Data Format: + 0. Please refer lib/datasets/mnist.py as an example + 1. the dataset should has attribute X,y to represent the data and label + 2. y should be 1-d array + 3. For fine-grained scanning, X should be 4-d array (N x channel x H x W). + (e.g. cifar10 shoud be Nx3x32x32, mnist should be Nx1x28x28, uci_semg should be Nx1x3000x1) +* Model Specifications: + 1. Save the json file in models/$dataset_name (recommended) + 2. for a detailed description, see section 'Config Files' +* If you only need to train a cascade forest, run tools/train_cascade.py. + + +Happy Hacking. + +Reference: +[1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. + (https://arxiv.org/abs/1702.08835v2 ) + diff --git a/datasets/gtzan/get_data.sh b/datasets/gtzan/get_data.sh new file mode 100644 index 0000000..e212ae4 --- /dev/null +++ b/datasets/gtzan/get_data.sh @@ -0,0 +1,8 @@ +#Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +#fit your own datasets. +#Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +#Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +#ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +#ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +wget http://opihi.cs.uvic.ca/sound/genres.tar.gz +tar zxvf genres.tar.gz diff --git a/datasets/gtzan/splits/blues.train b/datasets/gtzan/splits/blues.train new file mode 100644 index 0000000..cf35e04 --- /dev/null +++ b/datasets/gtzan/splits/blues.train @@ -0,0 +1,70 @@ +blues/blues.00000.au 0 +blues/blues.00001.au 0 +blues/blues.00004.au 0 +blues/blues.00005.au 0 +blues/blues.00009.au 0 +blues/blues.00010.au 0 +blues/blues.00011.au 0 +blues/blues.00012.au 0 +blues/blues.00014.au 0 +blues/blues.00015.au 0 +blues/blues.00017.au 0 +blues/blues.00018.au 0 +blues/blues.00019.au 0 +blues/blues.00020.au 0 +blues/blues.00021.au 0 +blues/blues.00023.au 0 +blues/blues.00025.au 0 +blues/blues.00027.au 0 +blues/blues.00028.au 0 +blues/blues.00029.au 0 +blues/blues.00031.au 0 +blues/blues.00032.au 0 +blues/blues.00034.au 0 +blues/blues.00035.au 0 +blues/blues.00036.au 0 +blues/blues.00037.au 0 +blues/blues.00038.au 0 +blues/blues.00039.au 0 +blues/blues.00040.au 0 +blues/blues.00041.au 0 +blues/blues.00042.au 0 +blues/blues.00044.au 0 +blues/blues.00046.au 0 +blues/blues.00047.au 0 +blues/blues.00049.au 0 +blues/blues.00050.au 0 +blues/blues.00051.au 0 +blues/blues.00052.au 0 +blues/blues.00056.au 0 +blues/blues.00057.au 0 +blues/blues.00058.au 0 +blues/blues.00059.au 0 +blues/blues.00060.au 0 +blues/blues.00061.au 0 +blues/blues.00063.au 0 +blues/blues.00064.au 0 +blues/blues.00065.au 0 +blues/blues.00066.au 0 +blues/blues.00067.au 0 +blues/blues.00068.au 0 +blues/blues.00069.au 0 +blues/blues.00070.au 0 +blues/blues.00072.au 0 +blues/blues.00074.au 0 +blues/blues.00077.au 0 +blues/blues.00079.au 0 +blues/blues.00080.au 0 +blues/blues.00081.au 0 +blues/blues.00083.au 0 +blues/blues.00084.au 0 +blues/blues.00085.au 0 +blues/blues.00087.au 0 +blues/blues.00088.au 0 +blues/blues.00089.au 0 +blues/blues.00090.au 0 +blues/blues.00091.au 0 +blues/blues.00094.au 0 +blues/blues.00096.au 0 +blues/blues.00097.au 0 +blues/blues.00098.au 0 diff --git a/datasets/gtzan/splits/blues.trainval b/datasets/gtzan/splits/blues.trainval new file mode 100644 index 0000000..e4708d8 --- /dev/null +++ b/datasets/gtzan/splits/blues.trainval @@ -0,0 +1,100 @@ +blues/blues.00000.au 0 +blues/blues.00001.au 0 +blues/blues.00002.au 0 +blues/blues.00003.au 0 +blues/blues.00004.au 0 +blues/blues.00005.au 0 +blues/blues.00006.au 0 +blues/blues.00007.au 0 +blues/blues.00008.au 0 +blues/blues.00009.au 0 +blues/blues.00010.au 0 +blues/blues.00011.au 0 +blues/blues.00012.au 0 +blues/blues.00013.au 0 +blues/blues.00014.au 0 +blues/blues.00015.au 0 +blues/blues.00016.au 0 +blues/blues.00017.au 0 +blues/blues.00018.au 0 +blues/blues.00019.au 0 +blues/blues.00020.au 0 +blues/blues.00021.au 0 +blues/blues.00022.au 0 +blues/blues.00023.au 0 +blues/blues.00024.au 0 +blues/blues.00025.au 0 +blues/blues.00026.au 0 +blues/blues.00027.au 0 +blues/blues.00028.au 0 +blues/blues.00029.au 0 +blues/blues.00030.au 0 +blues/blues.00031.au 0 +blues/blues.00032.au 0 +blues/blues.00033.au 0 +blues/blues.00034.au 0 +blues/blues.00035.au 0 +blues/blues.00036.au 0 +blues/blues.00037.au 0 +blues/blues.00038.au 0 +blues/blues.00039.au 0 +blues/blues.00040.au 0 +blues/blues.00041.au 0 +blues/blues.00042.au 0 +blues/blues.00043.au 0 +blues/blues.00044.au 0 +blues/blues.00045.au 0 +blues/blues.00046.au 0 +blues/blues.00047.au 0 +blues/blues.00048.au 0 +blues/blues.00049.au 0 +blues/blues.00050.au 0 +blues/blues.00051.au 0 +blues/blues.00052.au 0 +blues/blues.00053.au 0 +blues/blues.00054.au 0 +blues/blues.00055.au 0 +blues/blues.00056.au 0 +blues/blues.00057.au 0 +blues/blues.00058.au 0 +blues/blues.00059.au 0 +blues/blues.00060.au 0 +blues/blues.00061.au 0 +blues/blues.00062.au 0 +blues/blues.00063.au 0 +blues/blues.00064.au 0 +blues/blues.00065.au 0 +blues/blues.00066.au 0 +blues/blues.00067.au 0 +blues/blues.00068.au 0 +blues/blues.00069.au 0 +blues/blues.00070.au 0 +blues/blues.00071.au 0 +blues/blues.00072.au 0 +blues/blues.00073.au 0 +blues/blues.00074.au 0 +blues/blues.00075.au 0 +blues/blues.00076.au 0 +blues/blues.00077.au 0 +blues/blues.00078.au 0 +blues/blues.00079.au 0 +blues/blues.00080.au 0 +blues/blues.00081.au 0 +blues/blues.00082.au 0 +blues/blues.00083.au 0 +blues/blues.00084.au 0 +blues/blues.00085.au 0 +blues/blues.00086.au 0 +blues/blues.00087.au 0 +blues/blues.00088.au 0 +blues/blues.00089.au 0 +blues/blues.00090.au 0 +blues/blues.00091.au 0 +blues/blues.00092.au 0 +blues/blues.00093.au 0 +blues/blues.00094.au 0 +blues/blues.00095.au 0 +blues/blues.00096.au 0 +blues/blues.00097.au 0 +blues/blues.00098.au 0 +blues/blues.00099.au 0 diff --git a/datasets/gtzan/splits/blues.val b/datasets/gtzan/splits/blues.val new file mode 100644 index 0000000..8ac8c80 --- /dev/null +++ b/datasets/gtzan/splits/blues.val @@ -0,0 +1,30 @@ +blues/blues.00002.au 0 +blues/blues.00003.au 0 +blues/blues.00006.au 0 +blues/blues.00007.au 0 +blues/blues.00008.au 0 +blues/blues.00013.au 0 +blues/blues.00016.au 0 +blues/blues.00022.au 0 +blues/blues.00024.au 0 +blues/blues.00026.au 0 +blues/blues.00030.au 0 +blues/blues.00033.au 0 +blues/blues.00043.au 0 +blues/blues.00045.au 0 +blues/blues.00048.au 0 +blues/blues.00053.au 0 +blues/blues.00054.au 0 +blues/blues.00055.au 0 +blues/blues.00062.au 0 +blues/blues.00071.au 0 +blues/blues.00073.au 0 +blues/blues.00075.au 0 +blues/blues.00076.au 0 +blues/blues.00078.au 0 +blues/blues.00082.au 0 +blues/blues.00086.au 0 +blues/blues.00092.au 0 +blues/blues.00093.au 0 +blues/blues.00095.au 0 +blues/blues.00099.au 0 diff --git a/datasets/gtzan/splits/classical.train b/datasets/gtzan/splits/classical.train new file mode 100644 index 0000000..a5e9605 --- /dev/null +++ b/datasets/gtzan/splits/classical.train @@ -0,0 +1,70 @@ +classical/classical.00000.au 1 +classical/classical.00001.au 1 +classical/classical.00003.au 1 +classical/classical.00004.au 1 +classical/classical.00005.au 1 +classical/classical.00006.au 1 +classical/classical.00007.au 1 +classical/classical.00008.au 1 +classical/classical.00009.au 1 +classical/classical.00011.au 1 +classical/classical.00012.au 1 +classical/classical.00013.au 1 +classical/classical.00014.au 1 +classical/classical.00015.au 1 +classical/classical.00016.au 1 +classical/classical.00018.au 1 +classical/classical.00020.au 1 +classical/classical.00021.au 1 +classical/classical.00022.au 1 +classical/classical.00023.au 1 +classical/classical.00024.au 1 +classical/classical.00025.au 1 +classical/classical.00026.au 1 +classical/classical.00028.au 1 +classical/classical.00029.au 1 +classical/classical.00030.au 1 +classical/classical.00034.au 1 +classical/classical.00037.au 1 +classical/classical.00040.au 1 +classical/classical.00041.au 1 +classical/classical.00042.au 1 +classical/classical.00043.au 1 +classical/classical.00045.au 1 +classical/classical.00047.au 1 +classical/classical.00048.au 1 +classical/classical.00049.au 1 +classical/classical.00050.au 1 +classical/classical.00053.au 1 +classical/classical.00054.au 1 +classical/classical.00055.au 1 +classical/classical.00057.au 1 +classical/classical.00058.au 1 +classical/classical.00059.au 1 +classical/classical.00060.au 1 +classical/classical.00061.au 1 +classical/classical.00063.au 1 +classical/classical.00064.au 1 +classical/classical.00066.au 1 +classical/classical.00067.au 1 +classical/classical.00068.au 1 +classical/classical.00070.au 1 +classical/classical.00071.au 1 +classical/classical.00072.au 1 +classical/classical.00074.au 1 +classical/classical.00075.au 1 +classical/classical.00076.au 1 +classical/classical.00077.au 1 +classical/classical.00079.au 1 +classical/classical.00083.au 1 +classical/classical.00085.au 1 +classical/classical.00086.au 1 +classical/classical.00087.au 1 +classical/classical.00088.au 1 +classical/classical.00089.au 1 +classical/classical.00090.au 1 +classical/classical.00091.au 1 +classical/classical.00095.au 1 +classical/classical.00096.au 1 +classical/classical.00098.au 1 +classical/classical.00099.au 1 diff --git a/datasets/gtzan/splits/classical.trainval b/datasets/gtzan/splits/classical.trainval new file mode 100644 index 0000000..f6c69af --- /dev/null +++ b/datasets/gtzan/splits/classical.trainval @@ -0,0 +1,100 @@ +classical/classical.00000.au 1 +classical/classical.00001.au 1 +classical/classical.00002.au 1 +classical/classical.00003.au 1 +classical/classical.00004.au 1 +classical/classical.00005.au 1 +classical/classical.00006.au 1 +classical/classical.00007.au 1 +classical/classical.00008.au 1 +classical/classical.00009.au 1 +classical/classical.00010.au 1 +classical/classical.00011.au 1 +classical/classical.00012.au 1 +classical/classical.00013.au 1 +classical/classical.00014.au 1 +classical/classical.00015.au 1 +classical/classical.00016.au 1 +classical/classical.00017.au 1 +classical/classical.00018.au 1 +classical/classical.00019.au 1 +classical/classical.00020.au 1 +classical/classical.00021.au 1 +classical/classical.00022.au 1 +classical/classical.00023.au 1 +classical/classical.00024.au 1 +classical/classical.00025.au 1 +classical/classical.00026.au 1 +classical/classical.00027.au 1 +classical/classical.00028.au 1 +classical/classical.00029.au 1 +classical/classical.00030.au 1 +classical/classical.00031.au 1 +classical/classical.00032.au 1 +classical/classical.00033.au 1 +classical/classical.00034.au 1 +classical/classical.00035.au 1 +classical/classical.00036.au 1 +classical/classical.00037.au 1 +classical/classical.00038.au 1 +classical/classical.00039.au 1 +classical/classical.00040.au 1 +classical/classical.00041.au 1 +classical/classical.00042.au 1 +classical/classical.00043.au 1 +classical/classical.00044.au 1 +classical/classical.00045.au 1 +classical/classical.00046.au 1 +classical/classical.00047.au 1 +classical/classical.00048.au 1 +classical/classical.00049.au 1 +classical/classical.00050.au 1 +classical/classical.00051.au 1 +classical/classical.00052.au 1 +classical/classical.00053.au 1 +classical/classical.00054.au 1 +classical/classical.00055.au 1 +classical/classical.00056.au 1 +classical/classical.00057.au 1 +classical/classical.00058.au 1 +classical/classical.00059.au 1 +classical/classical.00060.au 1 +classical/classical.00061.au 1 +classical/classical.00062.au 1 +classical/classical.00063.au 1 +classical/classical.00064.au 1 +classical/classical.00065.au 1 +classical/classical.00066.au 1 +classical/classical.00067.au 1 +classical/classical.00068.au 1 +classical/classical.00069.au 1 +classical/classical.00070.au 1 +classical/classical.00071.au 1 +classical/classical.00072.au 1 +classical/classical.00073.au 1 +classical/classical.00074.au 1 +classical/classical.00075.au 1 +classical/classical.00076.au 1 +classical/classical.00077.au 1 +classical/classical.00078.au 1 +classical/classical.00079.au 1 +classical/classical.00080.au 1 +classical/classical.00081.au 1 +classical/classical.00082.au 1 +classical/classical.00083.au 1 +classical/classical.00084.au 1 +classical/classical.00085.au 1 +classical/classical.00086.au 1 +classical/classical.00087.au 1 +classical/classical.00088.au 1 +classical/classical.00089.au 1 +classical/classical.00090.au 1 +classical/classical.00091.au 1 +classical/classical.00092.au 1 +classical/classical.00093.au 1 +classical/classical.00094.au 1 +classical/classical.00095.au 1 +classical/classical.00096.au 1 +classical/classical.00097.au 1 +classical/classical.00098.au 1 +classical/classical.00099.au 1 diff --git a/datasets/gtzan/splits/classical.val b/datasets/gtzan/splits/classical.val new file mode 100644 index 0000000..aa68f9c --- /dev/null +++ b/datasets/gtzan/splits/classical.val @@ -0,0 +1,30 @@ +classical/classical.00002.au 1 +classical/classical.00010.au 1 +classical/classical.00017.au 1 +classical/classical.00019.au 1 +classical/classical.00027.au 1 +classical/classical.00031.au 1 +classical/classical.00032.au 1 +classical/classical.00033.au 1 +classical/classical.00035.au 1 +classical/classical.00036.au 1 +classical/classical.00038.au 1 +classical/classical.00039.au 1 +classical/classical.00044.au 1 +classical/classical.00046.au 1 +classical/classical.00051.au 1 +classical/classical.00052.au 1 +classical/classical.00056.au 1 +classical/classical.00062.au 1 +classical/classical.00065.au 1 +classical/classical.00069.au 1 +classical/classical.00073.au 1 +classical/classical.00078.au 1 +classical/classical.00080.au 1 +classical/classical.00081.au 1 +classical/classical.00082.au 1 +classical/classical.00084.au 1 +classical/classical.00092.au 1 +classical/classical.00093.au 1 +classical/classical.00094.au 1 +classical/classical.00097.au 1 diff --git a/datasets/gtzan/splits/country.train b/datasets/gtzan/splits/country.train new file mode 100644 index 0000000..51ffad4 --- /dev/null +++ b/datasets/gtzan/splits/country.train @@ -0,0 +1,70 @@ +country/country.00001.au 2 +country/country.00004.au 2 +country/country.00006.au 2 +country/country.00007.au 2 +country/country.00008.au 2 +country/country.00009.au 2 +country/country.00010.au 2 +country/country.00012.au 2 +country/country.00015.au 2 +country/country.00017.au 2 +country/country.00018.au 2 +country/country.00019.au 2 +country/country.00020.au 2 +country/country.00022.au 2 +country/country.00026.au 2 +country/country.00031.au 2 +country/country.00032.au 2 +country/country.00033.au 2 +country/country.00034.au 2 +country/country.00036.au 2 +country/country.00037.au 2 +country/country.00038.au 2 +country/country.00039.au 2 +country/country.00040.au 2 +country/country.00041.au 2 +country/country.00042.au 2 +country/country.00043.au 2 +country/country.00045.au 2 +country/country.00046.au 2 +country/country.00047.au 2 +country/country.00048.au 2 +country/country.00049.au 2 +country/country.00050.au 2 +country/country.00051.au 2 +country/country.00052.au 2 +country/country.00053.au 2 +country/country.00054.au 2 +country/country.00055.au 2 +country/country.00057.au 2 +country/country.00058.au 2 +country/country.00059.au 2 +country/country.00060.au 2 +country/country.00063.au 2 +country/country.00064.au 2 +country/country.00065.au 2 +country/country.00066.au 2 +country/country.00067.au 2 +country/country.00068.au 2 +country/country.00069.au 2 +country/country.00070.au 2 +country/country.00071.au 2 +country/country.00072.au 2 +country/country.00075.au 2 +country/country.00078.au 2 +country/country.00080.au 2 +country/country.00081.au 2 +country/country.00082.au 2 +country/country.00085.au 2 +country/country.00086.au 2 +country/country.00087.au 2 +country/country.00088.au 2 +country/country.00089.au 2 +country/country.00090.au 2 +country/country.00091.au 2 +country/country.00093.au 2 +country/country.00094.au 2 +country/country.00095.au 2 +country/country.00096.au 2 +country/country.00097.au 2 +country/country.00098.au 2 diff --git a/datasets/gtzan/splits/country.trainval b/datasets/gtzan/splits/country.trainval new file mode 100644 index 0000000..a70ba59 --- /dev/null +++ b/datasets/gtzan/splits/country.trainval @@ -0,0 +1,100 @@ +country/country.00000.au 2 +country/country.00001.au 2 +country/country.00002.au 2 +country/country.00003.au 2 +country/country.00004.au 2 +country/country.00005.au 2 +country/country.00006.au 2 +country/country.00007.au 2 +country/country.00008.au 2 +country/country.00009.au 2 +country/country.00010.au 2 +country/country.00011.au 2 +country/country.00012.au 2 +country/country.00013.au 2 +country/country.00014.au 2 +country/country.00015.au 2 +country/country.00016.au 2 +country/country.00017.au 2 +country/country.00018.au 2 +country/country.00019.au 2 +country/country.00020.au 2 +country/country.00021.au 2 +country/country.00022.au 2 +country/country.00023.au 2 +country/country.00024.au 2 +country/country.00025.au 2 +country/country.00026.au 2 +country/country.00027.au 2 +country/country.00028.au 2 +country/country.00029.au 2 +country/country.00030.au 2 +country/country.00031.au 2 +country/country.00032.au 2 +country/country.00033.au 2 +country/country.00034.au 2 +country/country.00035.au 2 +country/country.00036.au 2 +country/country.00037.au 2 +country/country.00038.au 2 +country/country.00039.au 2 +country/country.00040.au 2 +country/country.00041.au 2 +country/country.00042.au 2 +country/country.00043.au 2 +country/country.00044.au 2 +country/country.00045.au 2 +country/country.00046.au 2 +country/country.00047.au 2 +country/country.00048.au 2 +country/country.00049.au 2 +country/country.00050.au 2 +country/country.00051.au 2 +country/country.00052.au 2 +country/country.00053.au 2 +country/country.00054.au 2 +country/country.00055.au 2 +country/country.00056.au 2 +country/country.00057.au 2 +country/country.00058.au 2 +country/country.00059.au 2 +country/country.00060.au 2 +country/country.00061.au 2 +country/country.00062.au 2 +country/country.00063.au 2 +country/country.00064.au 2 +country/country.00065.au 2 +country/country.00066.au 2 +country/country.00067.au 2 +country/country.00068.au 2 +country/country.00069.au 2 +country/country.00070.au 2 +country/country.00071.au 2 +country/country.00072.au 2 +country/country.00073.au 2 +country/country.00074.au 2 +country/country.00075.au 2 +country/country.00076.au 2 +country/country.00077.au 2 +country/country.00078.au 2 +country/country.00079.au 2 +country/country.00080.au 2 +country/country.00081.au 2 +country/country.00082.au 2 +country/country.00083.au 2 +country/country.00084.au 2 +country/country.00085.au 2 +country/country.00086.au 2 +country/country.00087.au 2 +country/country.00088.au 2 +country/country.00089.au 2 +country/country.00090.au 2 +country/country.00091.au 2 +country/country.00092.au 2 +country/country.00093.au 2 +country/country.00094.au 2 +country/country.00095.au 2 +country/country.00096.au 2 +country/country.00097.au 2 +country/country.00098.au 2 +country/country.00099.au 2 diff --git a/datasets/gtzan/splits/country.val b/datasets/gtzan/splits/country.val new file mode 100644 index 0000000..a655754 --- /dev/null +++ b/datasets/gtzan/splits/country.val @@ -0,0 +1,30 @@ +country/country.00000.au 2 +country/country.00002.au 2 +country/country.00003.au 2 +country/country.00005.au 2 +country/country.00011.au 2 +country/country.00013.au 2 +country/country.00014.au 2 +country/country.00016.au 2 +country/country.00021.au 2 +country/country.00023.au 2 +country/country.00024.au 2 +country/country.00025.au 2 +country/country.00027.au 2 +country/country.00028.au 2 +country/country.00029.au 2 +country/country.00030.au 2 +country/country.00035.au 2 +country/country.00044.au 2 +country/country.00056.au 2 +country/country.00061.au 2 +country/country.00062.au 2 +country/country.00073.au 2 +country/country.00074.au 2 +country/country.00076.au 2 +country/country.00077.au 2 +country/country.00079.au 2 +country/country.00083.au 2 +country/country.00084.au 2 +country/country.00092.au 2 +country/country.00099.au 2 diff --git a/datasets/gtzan/splits/disco.train b/datasets/gtzan/splits/disco.train new file mode 100644 index 0000000..a7459ae --- /dev/null +++ b/datasets/gtzan/splits/disco.train @@ -0,0 +1,70 @@ +disco/disco.00000.au 3 +disco/disco.00001.au 3 +disco/disco.00002.au 3 +disco/disco.00003.au 3 +disco/disco.00005.au 3 +disco/disco.00007.au 3 +disco/disco.00010.au 3 +disco/disco.00011.au 3 +disco/disco.00013.au 3 +disco/disco.00014.au 3 +disco/disco.00016.au 3 +disco/disco.00018.au 3 +disco/disco.00019.au 3 +disco/disco.00020.au 3 +disco/disco.00021.au 3 +disco/disco.00022.au 3 +disco/disco.00024.au 3 +disco/disco.00026.au 3 +disco/disco.00027.au 3 +disco/disco.00028.au 3 +disco/disco.00029.au 3 +disco/disco.00030.au 3 +disco/disco.00031.au 3 +disco/disco.00032.au 3 +disco/disco.00033.au 3 +disco/disco.00035.au 3 +disco/disco.00036.au 3 +disco/disco.00037.au 3 +disco/disco.00038.au 3 +disco/disco.00039.au 3 +disco/disco.00040.au 3 +disco/disco.00041.au 3 +disco/disco.00043.au 3 +disco/disco.00044.au 3 +disco/disco.00046.au 3 +disco/disco.00048.au 3 +disco/disco.00049.au 3 +disco/disco.00051.au 3 +disco/disco.00052.au 3 +disco/disco.00053.au 3 +disco/disco.00054.au 3 +disco/disco.00055.au 3 +disco/disco.00056.au 3 +disco/disco.00057.au 3 +disco/disco.00058.au 3 +disco/disco.00060.au 3 +disco/disco.00061.au 3 +disco/disco.00062.au 3 +disco/disco.00063.au 3 +disco/disco.00066.au 3 +disco/disco.00068.au 3 +disco/disco.00069.au 3 +disco/disco.00070.au 3 +disco/disco.00072.au 3 +disco/disco.00074.au 3 +disco/disco.00076.au 3 +disco/disco.00078.au 3 +disco/disco.00080.au 3 +disco/disco.00081.au 3 +disco/disco.00084.au 3 +disco/disco.00085.au 3 +disco/disco.00087.au 3 +disco/disco.00088.au 3 +disco/disco.00089.au 3 +disco/disco.00090.au 3 +disco/disco.00091.au 3 +disco/disco.00092.au 3 +disco/disco.00094.au 3 +disco/disco.00095.au 3 +disco/disco.00099.au 3 diff --git a/datasets/gtzan/splits/disco.trainval b/datasets/gtzan/splits/disco.trainval new file mode 100644 index 0000000..83abb9c --- /dev/null +++ b/datasets/gtzan/splits/disco.trainval @@ -0,0 +1,100 @@ +disco/disco.00000.au 3 +disco/disco.00001.au 3 +disco/disco.00002.au 3 +disco/disco.00003.au 3 +disco/disco.00004.au 3 +disco/disco.00005.au 3 +disco/disco.00006.au 3 +disco/disco.00007.au 3 +disco/disco.00008.au 3 +disco/disco.00009.au 3 +disco/disco.00010.au 3 +disco/disco.00011.au 3 +disco/disco.00012.au 3 +disco/disco.00013.au 3 +disco/disco.00014.au 3 +disco/disco.00015.au 3 +disco/disco.00016.au 3 +disco/disco.00017.au 3 +disco/disco.00018.au 3 +disco/disco.00019.au 3 +disco/disco.00020.au 3 +disco/disco.00021.au 3 +disco/disco.00022.au 3 +disco/disco.00023.au 3 +disco/disco.00024.au 3 +disco/disco.00025.au 3 +disco/disco.00026.au 3 +disco/disco.00027.au 3 +disco/disco.00028.au 3 +disco/disco.00029.au 3 +disco/disco.00030.au 3 +disco/disco.00031.au 3 +disco/disco.00032.au 3 +disco/disco.00033.au 3 +disco/disco.00034.au 3 +disco/disco.00035.au 3 +disco/disco.00036.au 3 +disco/disco.00037.au 3 +disco/disco.00038.au 3 +disco/disco.00039.au 3 +disco/disco.00040.au 3 +disco/disco.00041.au 3 +disco/disco.00042.au 3 +disco/disco.00043.au 3 +disco/disco.00044.au 3 +disco/disco.00045.au 3 +disco/disco.00046.au 3 +disco/disco.00047.au 3 +disco/disco.00048.au 3 +disco/disco.00049.au 3 +disco/disco.00050.au 3 +disco/disco.00051.au 3 +disco/disco.00052.au 3 +disco/disco.00053.au 3 +disco/disco.00054.au 3 +disco/disco.00055.au 3 +disco/disco.00056.au 3 +disco/disco.00057.au 3 +disco/disco.00058.au 3 +disco/disco.00059.au 3 +disco/disco.00060.au 3 +disco/disco.00061.au 3 +disco/disco.00062.au 3 +disco/disco.00063.au 3 +disco/disco.00064.au 3 +disco/disco.00065.au 3 +disco/disco.00066.au 3 +disco/disco.00067.au 3 +disco/disco.00068.au 3 +disco/disco.00069.au 3 +disco/disco.00070.au 3 +disco/disco.00071.au 3 +disco/disco.00072.au 3 +disco/disco.00073.au 3 +disco/disco.00074.au 3 +disco/disco.00075.au 3 +disco/disco.00076.au 3 +disco/disco.00077.au 3 +disco/disco.00078.au 3 +disco/disco.00079.au 3 +disco/disco.00080.au 3 +disco/disco.00081.au 3 +disco/disco.00082.au 3 +disco/disco.00083.au 3 +disco/disco.00084.au 3 +disco/disco.00085.au 3 +disco/disco.00086.au 3 +disco/disco.00087.au 3 +disco/disco.00088.au 3 +disco/disco.00089.au 3 +disco/disco.00090.au 3 +disco/disco.00091.au 3 +disco/disco.00092.au 3 +disco/disco.00093.au 3 +disco/disco.00094.au 3 +disco/disco.00095.au 3 +disco/disco.00096.au 3 +disco/disco.00097.au 3 +disco/disco.00098.au 3 +disco/disco.00099.au 3 diff --git a/datasets/gtzan/splits/disco.val b/datasets/gtzan/splits/disco.val new file mode 100644 index 0000000..bf9c58f --- /dev/null +++ b/datasets/gtzan/splits/disco.val @@ -0,0 +1,30 @@ +disco/disco.00004.au 3 +disco/disco.00006.au 3 +disco/disco.00008.au 3 +disco/disco.00009.au 3 +disco/disco.00012.au 3 +disco/disco.00015.au 3 +disco/disco.00017.au 3 +disco/disco.00023.au 3 +disco/disco.00025.au 3 +disco/disco.00034.au 3 +disco/disco.00042.au 3 +disco/disco.00045.au 3 +disco/disco.00047.au 3 +disco/disco.00050.au 3 +disco/disco.00059.au 3 +disco/disco.00064.au 3 +disco/disco.00065.au 3 +disco/disco.00067.au 3 +disco/disco.00071.au 3 +disco/disco.00073.au 3 +disco/disco.00075.au 3 +disco/disco.00077.au 3 +disco/disco.00079.au 3 +disco/disco.00082.au 3 +disco/disco.00083.au 3 +disco/disco.00086.au 3 +disco/disco.00093.au 3 +disco/disco.00096.au 3 +disco/disco.00097.au 3 +disco/disco.00098.au 3 diff --git a/datasets/gtzan/splits/genre.train b/datasets/gtzan/splits/genre.train new file mode 100644 index 0000000..5aa481b --- /dev/null +++ b/datasets/gtzan/splits/genre.train @@ -0,0 +1,700 @@ +blues/blues.00000.au 0 +blues/blues.00001.au 0 +blues/blues.00004.au 0 +blues/blues.00005.au 0 +blues/blues.00009.au 0 +blues/blues.00010.au 0 +blues/blues.00011.au 0 +blues/blues.00012.au 0 +blues/blues.00014.au 0 +blues/blues.00015.au 0 +blues/blues.00017.au 0 +blues/blues.00018.au 0 +blues/blues.00019.au 0 +blues/blues.00020.au 0 +blues/blues.00021.au 0 +blues/blues.00023.au 0 +blues/blues.00025.au 0 +blues/blues.00027.au 0 +blues/blues.00028.au 0 +blues/blues.00029.au 0 +blues/blues.00031.au 0 +blues/blues.00032.au 0 +blues/blues.00034.au 0 +blues/blues.00035.au 0 +blues/blues.00036.au 0 +blues/blues.00037.au 0 +blues/blues.00038.au 0 +blues/blues.00039.au 0 +blues/blues.00040.au 0 +blues/blues.00041.au 0 +blues/blues.00042.au 0 +blues/blues.00044.au 0 +blues/blues.00046.au 0 +blues/blues.00047.au 0 +blues/blues.00049.au 0 +blues/blues.00050.au 0 +blues/blues.00051.au 0 +blues/blues.00052.au 0 +blues/blues.00056.au 0 +blues/blues.00057.au 0 +blues/blues.00058.au 0 +blues/blues.00059.au 0 +blues/blues.00060.au 0 +blues/blues.00061.au 0 +blues/blues.00063.au 0 +blues/blues.00064.au 0 +blues/blues.00065.au 0 +blues/blues.00066.au 0 +blues/blues.00067.au 0 +blues/blues.00068.au 0 +blues/blues.00069.au 0 +blues/blues.00070.au 0 +blues/blues.00072.au 0 +blues/blues.00074.au 0 +blues/blues.00077.au 0 +blues/blues.00079.au 0 +blues/blues.00080.au 0 +blues/blues.00081.au 0 +blues/blues.00083.au 0 +blues/blues.00084.au 0 +blues/blues.00085.au 0 +blues/blues.00087.au 0 +blues/blues.00088.au 0 +blues/blues.00089.au 0 +blues/blues.00090.au 0 +blues/blues.00091.au 0 +blues/blues.00094.au 0 +blues/blues.00096.au 0 +blues/blues.00097.au 0 +blues/blues.00098.au 0 +classical/classical.00000.au 1 +classical/classical.00001.au 1 +classical/classical.00003.au 1 +classical/classical.00004.au 1 +classical/classical.00005.au 1 +classical/classical.00006.au 1 +classical/classical.00007.au 1 +classical/classical.00008.au 1 +classical/classical.00009.au 1 +classical/classical.00011.au 1 +classical/classical.00012.au 1 +classical/classical.00013.au 1 +classical/classical.00014.au 1 +classical/classical.00015.au 1 +classical/classical.00016.au 1 +classical/classical.00018.au 1 +classical/classical.00020.au 1 +classical/classical.00021.au 1 +classical/classical.00022.au 1 +classical/classical.00023.au 1 +classical/classical.00024.au 1 +classical/classical.00025.au 1 +classical/classical.00026.au 1 +classical/classical.00028.au 1 +classical/classical.00029.au 1 +classical/classical.00030.au 1 +classical/classical.00034.au 1 +classical/classical.00037.au 1 +classical/classical.00040.au 1 +classical/classical.00041.au 1 +classical/classical.00042.au 1 +classical/classical.00043.au 1 +classical/classical.00045.au 1 +classical/classical.00047.au 1 +classical/classical.00048.au 1 +classical/classical.00049.au 1 +classical/classical.00050.au 1 +classical/classical.00053.au 1 +classical/classical.00054.au 1 +classical/classical.00055.au 1 +classical/classical.00057.au 1 +classical/classical.00058.au 1 +classical/classical.00059.au 1 +classical/classical.00060.au 1 +classical/classical.00061.au 1 +classical/classical.00063.au 1 +classical/classical.00064.au 1 +classical/classical.00066.au 1 +classical/classical.00067.au 1 +classical/classical.00068.au 1 +classical/classical.00070.au 1 +classical/classical.00071.au 1 +classical/classical.00072.au 1 +classical/classical.00074.au 1 +classical/classical.00075.au 1 +classical/classical.00076.au 1 +classical/classical.00077.au 1 +classical/classical.00079.au 1 +classical/classical.00083.au 1 +classical/classical.00085.au 1 +classical/classical.00086.au 1 +classical/classical.00087.au 1 +classical/classical.00088.au 1 +classical/classical.00089.au 1 +classical/classical.00090.au 1 +classical/classical.00091.au 1 +classical/classical.00095.au 1 +classical/classical.00096.au 1 +classical/classical.00098.au 1 +classical/classical.00099.au 1 +country/country.00001.au 2 +country/country.00004.au 2 +country/country.00006.au 2 +country/country.00007.au 2 +country/country.00008.au 2 +country/country.00009.au 2 +country/country.00010.au 2 +country/country.00012.au 2 +country/country.00015.au 2 +country/country.00017.au 2 +country/country.00018.au 2 +country/country.00019.au 2 +country/country.00020.au 2 +country/country.00022.au 2 +country/country.00026.au 2 +country/country.00031.au 2 +country/country.00032.au 2 +country/country.00033.au 2 +country/country.00034.au 2 +country/country.00036.au 2 +country/country.00037.au 2 +country/country.00038.au 2 +country/country.00039.au 2 +country/country.00040.au 2 +country/country.00041.au 2 +country/country.00042.au 2 +country/country.00043.au 2 +country/country.00045.au 2 +country/country.00046.au 2 +country/country.00047.au 2 +country/country.00048.au 2 +country/country.00049.au 2 +country/country.00050.au 2 +country/country.00051.au 2 +country/country.00052.au 2 +country/country.00053.au 2 +country/country.00054.au 2 +country/country.00055.au 2 +country/country.00057.au 2 +country/country.00058.au 2 +country/country.00059.au 2 +country/country.00060.au 2 +country/country.00063.au 2 +country/country.00064.au 2 +country/country.00065.au 2 +country/country.00066.au 2 +country/country.00067.au 2 +country/country.00068.au 2 +country/country.00069.au 2 +country/country.00070.au 2 +country/country.00071.au 2 +country/country.00072.au 2 +country/country.00075.au 2 +country/country.00078.au 2 +country/country.00080.au 2 +country/country.00081.au 2 +country/country.00082.au 2 +country/country.00085.au 2 +country/country.00086.au 2 +country/country.00087.au 2 +country/country.00088.au 2 +country/country.00089.au 2 +country/country.00090.au 2 +country/country.00091.au 2 +country/country.00093.au 2 +country/country.00094.au 2 +country/country.00095.au 2 +country/country.00096.au 2 +country/country.00097.au 2 +country/country.00098.au 2 +disco/disco.00000.au 3 +disco/disco.00001.au 3 +disco/disco.00002.au 3 +disco/disco.00003.au 3 +disco/disco.00005.au 3 +disco/disco.00007.au 3 +disco/disco.00010.au 3 +disco/disco.00011.au 3 +disco/disco.00013.au 3 +disco/disco.00014.au 3 +disco/disco.00016.au 3 +disco/disco.00018.au 3 +disco/disco.00019.au 3 +disco/disco.00020.au 3 +disco/disco.00021.au 3 +disco/disco.00022.au 3 +disco/disco.00024.au 3 +disco/disco.00026.au 3 +disco/disco.00027.au 3 +disco/disco.00028.au 3 +disco/disco.00029.au 3 +disco/disco.00030.au 3 +disco/disco.00031.au 3 +disco/disco.00032.au 3 +disco/disco.00033.au 3 +disco/disco.00035.au 3 +disco/disco.00036.au 3 +disco/disco.00037.au 3 +disco/disco.00038.au 3 +disco/disco.00039.au 3 +disco/disco.00040.au 3 +disco/disco.00041.au 3 +disco/disco.00043.au 3 +disco/disco.00044.au 3 +disco/disco.00046.au 3 +disco/disco.00048.au 3 +disco/disco.00049.au 3 +disco/disco.00051.au 3 +disco/disco.00052.au 3 +disco/disco.00053.au 3 +disco/disco.00054.au 3 +disco/disco.00055.au 3 +disco/disco.00056.au 3 +disco/disco.00057.au 3 +disco/disco.00058.au 3 +disco/disco.00060.au 3 +disco/disco.00061.au 3 +disco/disco.00062.au 3 +disco/disco.00063.au 3 +disco/disco.00066.au 3 +disco/disco.00068.au 3 +disco/disco.00069.au 3 +disco/disco.00070.au 3 +disco/disco.00072.au 3 +disco/disco.00074.au 3 +disco/disco.00076.au 3 +disco/disco.00078.au 3 +disco/disco.00080.au 3 +disco/disco.00081.au 3 +disco/disco.00084.au 3 +disco/disco.00085.au 3 +disco/disco.00087.au 3 +disco/disco.00088.au 3 +disco/disco.00089.au 3 +disco/disco.00090.au 3 +disco/disco.00091.au 3 +disco/disco.00092.au 3 +disco/disco.00094.au 3 +disco/disco.00095.au 3 +disco/disco.00099.au 3 +hiphop/hiphop.00000.au 4 +hiphop/hiphop.00001.au 4 +hiphop/hiphop.00003.au 4 +hiphop/hiphop.00006.au 4 +hiphop/hiphop.00007.au 4 +hiphop/hiphop.00008.au 4 +hiphop/hiphop.00009.au 4 +hiphop/hiphop.00011.au 4 +hiphop/hiphop.00012.au 4 +hiphop/hiphop.00015.au 4 +hiphop/hiphop.00017.au 4 +hiphop/hiphop.00018.au 4 +hiphop/hiphop.00021.au 4 +hiphop/hiphop.00022.au 4 +hiphop/hiphop.00023.au 4 +hiphop/hiphop.00025.au 4 +hiphop/hiphop.00027.au 4 +hiphop/hiphop.00028.au 4 +hiphop/hiphop.00030.au 4 +hiphop/hiphop.00031.au 4 +hiphop/hiphop.00032.au 4 +hiphop/hiphop.00033.au 4 +hiphop/hiphop.00034.au 4 +hiphop/hiphop.00036.au 4 +hiphop/hiphop.00038.au 4 +hiphop/hiphop.00039.au 4 +hiphop/hiphop.00040.au 4 +hiphop/hiphop.00042.au 4 +hiphop/hiphop.00044.au 4 +hiphop/hiphop.00045.au 4 +hiphop/hiphop.00046.au 4 +hiphop/hiphop.00048.au 4 +hiphop/hiphop.00049.au 4 +hiphop/hiphop.00050.au 4 +hiphop/hiphop.00051.au 4 +hiphop/hiphop.00052.au 4 +hiphop/hiphop.00055.au 4 +hiphop/hiphop.00056.au 4 +hiphop/hiphop.00057.au 4 +hiphop/hiphop.00058.au 4 +hiphop/hiphop.00059.au 4 +hiphop/hiphop.00061.au 4 +hiphop/hiphop.00062.au 4 +hiphop/hiphop.00064.au 4 +hiphop/hiphop.00065.au 4 +hiphop/hiphop.00066.au 4 +hiphop/hiphop.00067.au 4 +hiphop/hiphop.00068.au 4 +hiphop/hiphop.00069.au 4 +hiphop/hiphop.00072.au 4 +hiphop/hiphop.00073.au 4 +hiphop/hiphop.00074.au 4 +hiphop/hiphop.00075.au 4 +hiphop/hiphop.00077.au 4 +hiphop/hiphop.00078.au 4 +hiphop/hiphop.00079.au 4 +hiphop/hiphop.00082.au 4 +hiphop/hiphop.00083.au 4 +hiphop/hiphop.00084.au 4 +hiphop/hiphop.00086.au 4 +hiphop/hiphop.00087.au 4 +hiphop/hiphop.00088.au 4 +hiphop/hiphop.00089.au 4 +hiphop/hiphop.00090.au 4 +hiphop/hiphop.00091.au 4 +hiphop/hiphop.00092.au 4 +hiphop/hiphop.00093.au 4 +hiphop/hiphop.00094.au 4 +hiphop/hiphop.00098.au 4 +hiphop/hiphop.00099.au 4 +jazz/jazz.00000.au 5 +jazz/jazz.00001.au 5 +jazz/jazz.00002.au 5 +jazz/jazz.00003.au 5 +jazz/jazz.00004.au 5 +jazz/jazz.00005.au 5 +jazz/jazz.00006.au 5 +jazz/jazz.00007.au 5 +jazz/jazz.00008.au 5 +jazz/jazz.00009.au 5 +jazz/jazz.00011.au 5 +jazz/jazz.00014.au 5 +jazz/jazz.00015.au 5 +jazz/jazz.00016.au 5 +jazz/jazz.00018.au 5 +jazz/jazz.00019.au 5 +jazz/jazz.00021.au 5 +jazz/jazz.00025.au 5 +jazz/jazz.00026.au 5 +jazz/jazz.00027.au 5 +jazz/jazz.00029.au 5 +jazz/jazz.00030.au 5 +jazz/jazz.00031.au 5 +jazz/jazz.00033.au 5 +jazz/jazz.00036.au 5 +jazz/jazz.00038.au 5 +jazz/jazz.00041.au 5 +jazz/jazz.00043.au 5 +jazz/jazz.00044.au 5 +jazz/jazz.00045.au 5 +jazz/jazz.00047.au 5 +jazz/jazz.00048.au 5 +jazz/jazz.00049.au 5 +jazz/jazz.00050.au 5 +jazz/jazz.00051.au 5 +jazz/jazz.00052.au 5 +jazz/jazz.00053.au 5 +jazz/jazz.00054.au 5 +jazz/jazz.00058.au 5 +jazz/jazz.00059.au 5 +jazz/jazz.00061.au 5 +jazz/jazz.00062.au 5 +jazz/jazz.00064.au 5 +jazz/jazz.00065.au 5 +jazz/jazz.00067.au 5 +jazz/jazz.00068.au 5 +jazz/jazz.00072.au 5 +jazz/jazz.00073.au 5 +jazz/jazz.00075.au 5 +jazz/jazz.00076.au 5 +jazz/jazz.00077.au 5 +jazz/jazz.00078.au 5 +jazz/jazz.00079.au 5 +jazz/jazz.00080.au 5 +jazz/jazz.00081.au 5 +jazz/jazz.00082.au 5 +jazz/jazz.00083.au 5 +jazz/jazz.00084.au 5 +jazz/jazz.00085.au 5 +jazz/jazz.00086.au 5 +jazz/jazz.00088.au 5 +jazz/jazz.00089.au 5 +jazz/jazz.00090.au 5 +jazz/jazz.00091.au 5 +jazz/jazz.00092.au 5 +jazz/jazz.00093.au 5 +jazz/jazz.00095.au 5 +jazz/jazz.00096.au 5 +jazz/jazz.00098.au 5 +jazz/jazz.00099.au 5 +metal/metal.00001.au 6 +metal/metal.00002.au 6 +metal/metal.00003.au 6 +metal/metal.00004.au 6 +metal/metal.00005.au 6 +metal/metal.00006.au 6 +metal/metal.00007.au 6 +metal/metal.00008.au 6 +metal/metal.00010.au 6 +metal/metal.00011.au 6 +metal/metal.00012.au 6 +metal/metal.00014.au 6 +metal/metal.00015.au 6 +metal/metal.00018.au 6 +metal/metal.00019.au 6 +metal/metal.00021.au 6 +metal/metal.00022.au 6 +metal/metal.00024.au 6 +metal/metal.00025.au 6 +metal/metal.00026.au 6 +metal/metal.00027.au 6 +metal/metal.00028.au 6 +metal/metal.00031.au 6 +metal/metal.00033.au 6 +metal/metal.00035.au 6 +metal/metal.00036.au 6 +metal/metal.00038.au 6 +metal/metal.00040.au 6 +metal/metal.00041.au 6 +metal/metal.00042.au 6 +metal/metal.00043.au 6 +metal/metal.00046.au 6 +metal/metal.00049.au 6 +metal/metal.00050.au 6 +metal/metal.00051.au 6 +metal/metal.00052.au 6 +metal/metal.00053.au 6 +metal/metal.00055.au 6 +metal/metal.00056.au 6 +metal/metal.00057.au 6 +metal/metal.00058.au 6 +metal/metal.00062.au 6 +metal/metal.00063.au 6 +metal/metal.00064.au 6 +metal/metal.00065.au 6 +metal/metal.00066.au 6 +metal/metal.00068.au 6 +metal/metal.00069.au 6 +metal/metal.00071.au 6 +metal/metal.00072.au 6 +metal/metal.00073.au 6 +metal/metal.00075.au 6 +metal/metal.00076.au 6 +metal/metal.00077.au 6 +metal/metal.00078.au 6 +metal/metal.00079.au 6 +metal/metal.00080.au 6 +metal/metal.00082.au 6 +metal/metal.00084.au 6 +metal/metal.00085.au 6 +metal/metal.00087.au 6 +metal/metal.00088.au 6 +metal/metal.00089.au 6 +metal/metal.00090.au 6 +metal/metal.00092.au 6 +metal/metal.00094.au 6 +metal/metal.00095.au 6 +metal/metal.00096.au 6 +metal/metal.00098.au 6 +metal/metal.00099.au 6 +pop/pop.00000.au 7 +pop/pop.00001.au 7 +pop/pop.00003.au 7 +pop/pop.00004.au 7 +pop/pop.00006.au 7 +pop/pop.00007.au 7 +pop/pop.00008.au 7 +pop/pop.00009.au 7 +pop/pop.00010.au 7 +pop/pop.00014.au 7 +pop/pop.00016.au 7 +pop/pop.00019.au 7 +pop/pop.00021.au 7 +pop/pop.00023.au 7 +pop/pop.00024.au 7 +pop/pop.00025.au 7 +pop/pop.00027.au 7 +pop/pop.00028.au 7 +pop/pop.00029.au 7 +pop/pop.00031.au 7 +pop/pop.00032.au 7 +pop/pop.00033.au 7 +pop/pop.00034.au 7 +pop/pop.00035.au 7 +pop/pop.00036.au 7 +pop/pop.00038.au 7 +pop/pop.00039.au 7 +pop/pop.00041.au 7 +pop/pop.00042.au 7 +pop/pop.00043.au 7 +pop/pop.00044.au 7 +pop/pop.00046.au 7 +pop/pop.00047.au 7 +pop/pop.00048.au 7 +pop/pop.00053.au 7 +pop/pop.00054.au 7 +pop/pop.00055.au 7 +pop/pop.00056.au 7 +pop/pop.00057.au 7 +pop/pop.00058.au 7 +pop/pop.00059.au 7 +pop/pop.00060.au 7 +pop/pop.00061.au 7 +pop/pop.00062.au 7 +pop/pop.00063.au 7 +pop/pop.00064.au 7 +pop/pop.00065.au 7 +pop/pop.00067.au 7 +pop/pop.00068.au 7 +pop/pop.00069.au 7 +pop/pop.00070.au 7 +pop/pop.00072.au 7 +pop/pop.00073.au 7 +pop/pop.00075.au 7 +pop/pop.00076.au 7 +pop/pop.00077.au 7 +pop/pop.00079.au 7 +pop/pop.00080.au 7 +pop/pop.00081.au 7 +pop/pop.00082.au 7 +pop/pop.00083.au 7 +pop/pop.00084.au 7 +pop/pop.00087.au 7 +pop/pop.00089.au 7 +pop/pop.00090.au 7 +pop/pop.00092.au 7 +pop/pop.00094.au 7 +pop/pop.00095.au 7 +pop/pop.00098.au 7 +pop/pop.00099.au 7 +reggae/reggae.00000.au 8 +reggae/reggae.00002.au 8 +reggae/reggae.00004.au 8 +reggae/reggae.00005.au 8 +reggae/reggae.00006.au 8 +reggae/reggae.00007.au 8 +reggae/reggae.00008.au 8 +reggae/reggae.00009.au 8 +reggae/reggae.00010.au 8 +reggae/reggae.00013.au 8 +reggae/reggae.00014.au 8 +reggae/reggae.00015.au 8 +reggae/reggae.00018.au 8 +reggae/reggae.00020.au 8 +reggae/reggae.00021.au 8 +reggae/reggae.00022.au 8 +reggae/reggae.00025.au 8 +reggae/reggae.00026.au 8 +reggae/reggae.00027.au 8 +reggae/reggae.00028.au 8 +reggae/reggae.00031.au 8 +reggae/reggae.00032.au 8 +reggae/reggae.00034.au 8 +reggae/reggae.00035.au 8 +reggae/reggae.00036.au 8 +reggae/reggae.00037.au 8 +reggae/reggae.00038.au 8 +reggae/reggae.00040.au 8 +reggae/reggae.00042.au 8 +reggae/reggae.00045.au 8 +reggae/reggae.00046.au 8 +reggae/reggae.00047.au 8 +reggae/reggae.00048.au 8 +reggae/reggae.00049.au 8 +reggae/reggae.00050.au 8 +reggae/reggae.00051.au 8 +reggae/reggae.00052.au 8 +reggae/reggae.00053.au 8 +reggae/reggae.00054.au 8 +reggae/reggae.00056.au 8 +reggae/reggae.00058.au 8 +reggae/reggae.00059.au 8 +reggae/reggae.00060.au 8 +reggae/reggae.00061.au 8 +reggae/reggae.00063.au 8 +reggae/reggae.00065.au 8 +reggae/reggae.00066.au 8 +reggae/reggae.00067.au 8 +reggae/reggae.00068.au 8 +reggae/reggae.00069.au 8 +reggae/reggae.00071.au 8 +reggae/reggae.00073.au 8 +reggae/reggae.00074.au 8 +reggae/reggae.00075.au 8 +reggae/reggae.00076.au 8 +reggae/reggae.00077.au 8 +reggae/reggae.00078.au 8 +reggae/reggae.00081.au 8 +reggae/reggae.00082.au 8 +reggae/reggae.00083.au 8 +reggae/reggae.00084.au 8 +reggae/reggae.00085.au 8 +reggae/reggae.00087.au 8 +reggae/reggae.00089.au 8 +reggae/reggae.00090.au 8 +reggae/reggae.00091.au 8 +reggae/reggae.00093.au 8 +reggae/reggae.00094.au 8 +reggae/reggae.00095.au 8 +reggae/reggae.00096.au 8 +rock/rock.00000.au 9 +rock/rock.00001.au 9 +rock/rock.00002.au 9 +rock/rock.00004.au 9 +rock/rock.00005.au 9 +rock/rock.00007.au 9 +rock/rock.00008.au 9 +rock/rock.00009.au 9 +rock/rock.00010.au 9 +rock/rock.00011.au 9 +rock/rock.00012.au 9 +rock/rock.00013.au 9 +rock/rock.00015.au 9 +rock/rock.00016.au 9 +rock/rock.00017.au 9 +rock/rock.00018.au 9 +rock/rock.00019.au 9 +rock/rock.00022.au 9 +rock/rock.00024.au 9 +rock/rock.00027.au 9 +rock/rock.00029.au 9 +rock/rock.00030.au 9 +rock/rock.00033.au 9 +rock/rock.00034.au 9 +rock/rock.00035.au 9 +rock/rock.00037.au 9 +rock/rock.00038.au 9 +rock/rock.00040.au 9 +rock/rock.00043.au 9 +rock/rock.00045.au 9 +rock/rock.00047.au 9 +rock/rock.00049.au 9 +rock/rock.00050.au 9 +rock/rock.00052.au 9 +rock/rock.00053.au 9 +rock/rock.00054.au 9 +rock/rock.00056.au 9 +rock/rock.00057.au 9 +rock/rock.00058.au 9 +rock/rock.00059.au 9 +rock/rock.00060.au 9 +rock/rock.00062.au 9 +rock/rock.00063.au 9 +rock/rock.00064.au 9 +rock/rock.00065.au 9 +rock/rock.00066.au 9 +rock/rock.00067.au 9 +rock/rock.00069.au 9 +rock/rock.00071.au 9 +rock/rock.00072.au 9 +rock/rock.00074.au 9 +rock/rock.00076.au 9 +rock/rock.00077.au 9 +rock/rock.00079.au 9 +rock/rock.00080.au 9 +rock/rock.00081.au 9 +rock/rock.00084.au 9 +rock/rock.00085.au 9 +rock/rock.00087.au 9 +rock/rock.00088.au 9 +rock/rock.00089.au 9 +rock/rock.00090.au 9 +rock/rock.00091.au 9 +rock/rock.00092.au 9 +rock/rock.00093.au 9 +rock/rock.00094.au 9 +rock/rock.00096.au 9 +rock/rock.00097.au 9 +rock/rock.00098.au 9 +rock/rock.00099.au 9 diff --git a/datasets/gtzan/splits/genre.trainval b/datasets/gtzan/splits/genre.trainval new file mode 100644 index 0000000..5e5fe6a --- /dev/null +++ b/datasets/gtzan/splits/genre.trainval @@ -0,0 +1,1000 @@ +blues/blues.00000.au 0 +blues/blues.00001.au 0 +blues/blues.00002.au 0 +blues/blues.00003.au 0 +blues/blues.00004.au 0 +blues/blues.00005.au 0 +blues/blues.00006.au 0 +blues/blues.00007.au 0 +blues/blues.00008.au 0 +blues/blues.00009.au 0 +blues/blues.00010.au 0 +blues/blues.00011.au 0 +blues/blues.00012.au 0 +blues/blues.00013.au 0 +blues/blues.00014.au 0 +blues/blues.00015.au 0 +blues/blues.00016.au 0 +blues/blues.00017.au 0 +blues/blues.00018.au 0 +blues/blues.00019.au 0 +blues/blues.00020.au 0 +blues/blues.00021.au 0 +blues/blues.00022.au 0 +blues/blues.00023.au 0 +blues/blues.00024.au 0 +blues/blues.00025.au 0 +blues/blues.00026.au 0 +blues/blues.00027.au 0 +blues/blues.00028.au 0 +blues/blues.00029.au 0 +blues/blues.00030.au 0 +blues/blues.00031.au 0 +blues/blues.00032.au 0 +blues/blues.00033.au 0 +blues/blues.00034.au 0 +blues/blues.00035.au 0 +blues/blues.00036.au 0 +blues/blues.00037.au 0 +blues/blues.00038.au 0 +blues/blues.00039.au 0 +blues/blues.00040.au 0 +blues/blues.00041.au 0 +blues/blues.00042.au 0 +blues/blues.00043.au 0 +blues/blues.00044.au 0 +blues/blues.00045.au 0 +blues/blues.00046.au 0 +blues/blues.00047.au 0 +blues/blues.00048.au 0 +blues/blues.00049.au 0 +blues/blues.00050.au 0 +blues/blues.00051.au 0 +blues/blues.00052.au 0 +blues/blues.00053.au 0 +blues/blues.00054.au 0 +blues/blues.00055.au 0 +blues/blues.00056.au 0 +blues/blues.00057.au 0 +blues/blues.00058.au 0 +blues/blues.00059.au 0 +blues/blues.00060.au 0 +blues/blues.00061.au 0 +blues/blues.00062.au 0 +blues/blues.00063.au 0 +blues/blues.00064.au 0 +blues/blues.00065.au 0 +blues/blues.00066.au 0 +blues/blues.00067.au 0 +blues/blues.00068.au 0 +blues/blues.00069.au 0 +blues/blues.00070.au 0 +blues/blues.00071.au 0 +blues/blues.00072.au 0 +blues/blues.00073.au 0 +blues/blues.00074.au 0 +blues/blues.00075.au 0 +blues/blues.00076.au 0 +blues/blues.00077.au 0 +blues/blues.00078.au 0 +blues/blues.00079.au 0 +blues/blues.00080.au 0 +blues/blues.00081.au 0 +blues/blues.00082.au 0 +blues/blues.00083.au 0 +blues/blues.00084.au 0 +blues/blues.00085.au 0 +blues/blues.00086.au 0 +blues/blues.00087.au 0 +blues/blues.00088.au 0 +blues/blues.00089.au 0 +blues/blues.00090.au 0 +blues/blues.00091.au 0 +blues/blues.00092.au 0 +blues/blues.00093.au 0 +blues/blues.00094.au 0 +blues/blues.00095.au 0 +blues/blues.00096.au 0 +blues/blues.00097.au 0 +blues/blues.00098.au 0 +blues/blues.00099.au 0 +classical/classical.00000.au 1 +classical/classical.00001.au 1 +classical/classical.00002.au 1 +classical/classical.00003.au 1 +classical/classical.00004.au 1 +classical/classical.00005.au 1 +classical/classical.00006.au 1 +classical/classical.00007.au 1 +classical/classical.00008.au 1 +classical/classical.00009.au 1 +classical/classical.00010.au 1 +classical/classical.00011.au 1 +classical/classical.00012.au 1 +classical/classical.00013.au 1 +classical/classical.00014.au 1 +classical/classical.00015.au 1 +classical/classical.00016.au 1 +classical/classical.00017.au 1 +classical/classical.00018.au 1 +classical/classical.00019.au 1 +classical/classical.00020.au 1 +classical/classical.00021.au 1 +classical/classical.00022.au 1 +classical/classical.00023.au 1 +classical/classical.00024.au 1 +classical/classical.00025.au 1 +classical/classical.00026.au 1 +classical/classical.00027.au 1 +classical/classical.00028.au 1 +classical/classical.00029.au 1 +classical/classical.00030.au 1 +classical/classical.00031.au 1 +classical/classical.00032.au 1 +classical/classical.00033.au 1 +classical/classical.00034.au 1 +classical/classical.00035.au 1 +classical/classical.00036.au 1 +classical/classical.00037.au 1 +classical/classical.00038.au 1 +classical/classical.00039.au 1 +classical/classical.00040.au 1 +classical/classical.00041.au 1 +classical/classical.00042.au 1 +classical/classical.00043.au 1 +classical/classical.00044.au 1 +classical/classical.00045.au 1 +classical/classical.00046.au 1 +classical/classical.00047.au 1 +classical/classical.00048.au 1 +classical/classical.00049.au 1 +classical/classical.00050.au 1 +classical/classical.00051.au 1 +classical/classical.00052.au 1 +classical/classical.00053.au 1 +classical/classical.00054.au 1 +classical/classical.00055.au 1 +classical/classical.00056.au 1 +classical/classical.00057.au 1 +classical/classical.00058.au 1 +classical/classical.00059.au 1 +classical/classical.00060.au 1 +classical/classical.00061.au 1 +classical/classical.00062.au 1 +classical/classical.00063.au 1 +classical/classical.00064.au 1 +classical/classical.00065.au 1 +classical/classical.00066.au 1 +classical/classical.00067.au 1 +classical/classical.00068.au 1 +classical/classical.00069.au 1 +classical/classical.00070.au 1 +classical/classical.00071.au 1 +classical/classical.00072.au 1 +classical/classical.00073.au 1 +classical/classical.00074.au 1 +classical/classical.00075.au 1 +classical/classical.00076.au 1 +classical/classical.00077.au 1 +classical/classical.00078.au 1 +classical/classical.00079.au 1 +classical/classical.00080.au 1 +classical/classical.00081.au 1 +classical/classical.00082.au 1 +classical/classical.00083.au 1 +classical/classical.00084.au 1 +classical/classical.00085.au 1 +classical/classical.00086.au 1 +classical/classical.00087.au 1 +classical/classical.00088.au 1 +classical/classical.00089.au 1 +classical/classical.00090.au 1 +classical/classical.00091.au 1 +classical/classical.00092.au 1 +classical/classical.00093.au 1 +classical/classical.00094.au 1 +classical/classical.00095.au 1 +classical/classical.00096.au 1 +classical/classical.00097.au 1 +classical/classical.00098.au 1 +classical/classical.00099.au 1 +country/country.00000.au 2 +country/country.00001.au 2 +country/country.00002.au 2 +country/country.00003.au 2 +country/country.00004.au 2 +country/country.00005.au 2 +country/country.00006.au 2 +country/country.00007.au 2 +country/country.00008.au 2 +country/country.00009.au 2 +country/country.00010.au 2 +country/country.00011.au 2 +country/country.00012.au 2 +country/country.00013.au 2 +country/country.00014.au 2 +country/country.00015.au 2 +country/country.00016.au 2 +country/country.00017.au 2 +country/country.00018.au 2 +country/country.00019.au 2 +country/country.00020.au 2 +country/country.00021.au 2 +country/country.00022.au 2 +country/country.00023.au 2 +country/country.00024.au 2 +country/country.00025.au 2 +country/country.00026.au 2 +country/country.00027.au 2 +country/country.00028.au 2 +country/country.00029.au 2 +country/country.00030.au 2 +country/country.00031.au 2 +country/country.00032.au 2 +country/country.00033.au 2 +country/country.00034.au 2 +country/country.00035.au 2 +country/country.00036.au 2 +country/country.00037.au 2 +country/country.00038.au 2 +country/country.00039.au 2 +country/country.00040.au 2 +country/country.00041.au 2 +country/country.00042.au 2 +country/country.00043.au 2 +country/country.00044.au 2 +country/country.00045.au 2 +country/country.00046.au 2 +country/country.00047.au 2 +country/country.00048.au 2 +country/country.00049.au 2 +country/country.00050.au 2 +country/country.00051.au 2 +country/country.00052.au 2 +country/country.00053.au 2 +country/country.00054.au 2 +country/country.00055.au 2 +country/country.00056.au 2 +country/country.00057.au 2 +country/country.00058.au 2 +country/country.00059.au 2 +country/country.00060.au 2 +country/country.00061.au 2 +country/country.00062.au 2 +country/country.00063.au 2 +country/country.00064.au 2 +country/country.00065.au 2 +country/country.00066.au 2 +country/country.00067.au 2 +country/country.00068.au 2 +country/country.00069.au 2 +country/country.00070.au 2 +country/country.00071.au 2 +country/country.00072.au 2 +country/country.00073.au 2 +country/country.00074.au 2 +country/country.00075.au 2 +country/country.00076.au 2 +country/country.00077.au 2 +country/country.00078.au 2 +country/country.00079.au 2 +country/country.00080.au 2 +country/country.00081.au 2 +country/country.00082.au 2 +country/country.00083.au 2 +country/country.00084.au 2 +country/country.00085.au 2 +country/country.00086.au 2 +country/country.00087.au 2 +country/country.00088.au 2 +country/country.00089.au 2 +country/country.00090.au 2 +country/country.00091.au 2 +country/country.00092.au 2 +country/country.00093.au 2 +country/country.00094.au 2 +country/country.00095.au 2 +country/country.00096.au 2 +country/country.00097.au 2 +country/country.00098.au 2 +country/country.00099.au 2 +disco/disco.00000.au 3 +disco/disco.00001.au 3 +disco/disco.00002.au 3 +disco/disco.00003.au 3 +disco/disco.00004.au 3 +disco/disco.00005.au 3 +disco/disco.00006.au 3 +disco/disco.00007.au 3 +disco/disco.00008.au 3 +disco/disco.00009.au 3 +disco/disco.00010.au 3 +disco/disco.00011.au 3 +disco/disco.00012.au 3 +disco/disco.00013.au 3 +disco/disco.00014.au 3 +disco/disco.00015.au 3 +disco/disco.00016.au 3 +disco/disco.00017.au 3 +disco/disco.00018.au 3 +disco/disco.00019.au 3 +disco/disco.00020.au 3 +disco/disco.00021.au 3 +disco/disco.00022.au 3 +disco/disco.00023.au 3 +disco/disco.00024.au 3 +disco/disco.00025.au 3 +disco/disco.00026.au 3 +disco/disco.00027.au 3 +disco/disco.00028.au 3 +disco/disco.00029.au 3 +disco/disco.00030.au 3 +disco/disco.00031.au 3 +disco/disco.00032.au 3 +disco/disco.00033.au 3 +disco/disco.00034.au 3 +disco/disco.00035.au 3 +disco/disco.00036.au 3 +disco/disco.00037.au 3 +disco/disco.00038.au 3 +disco/disco.00039.au 3 +disco/disco.00040.au 3 +disco/disco.00041.au 3 +disco/disco.00042.au 3 +disco/disco.00043.au 3 +disco/disco.00044.au 3 +disco/disco.00045.au 3 +disco/disco.00046.au 3 +disco/disco.00047.au 3 +disco/disco.00048.au 3 +disco/disco.00049.au 3 +disco/disco.00050.au 3 +disco/disco.00051.au 3 +disco/disco.00052.au 3 +disco/disco.00053.au 3 +disco/disco.00054.au 3 +disco/disco.00055.au 3 +disco/disco.00056.au 3 +disco/disco.00057.au 3 +disco/disco.00058.au 3 +disco/disco.00059.au 3 +disco/disco.00060.au 3 +disco/disco.00061.au 3 +disco/disco.00062.au 3 +disco/disco.00063.au 3 +disco/disco.00064.au 3 +disco/disco.00065.au 3 +disco/disco.00066.au 3 +disco/disco.00067.au 3 +disco/disco.00068.au 3 +disco/disco.00069.au 3 +disco/disco.00070.au 3 +disco/disco.00071.au 3 +disco/disco.00072.au 3 +disco/disco.00073.au 3 +disco/disco.00074.au 3 +disco/disco.00075.au 3 +disco/disco.00076.au 3 +disco/disco.00077.au 3 +disco/disco.00078.au 3 +disco/disco.00079.au 3 +disco/disco.00080.au 3 +disco/disco.00081.au 3 +disco/disco.00082.au 3 +disco/disco.00083.au 3 +disco/disco.00084.au 3 +disco/disco.00085.au 3 +disco/disco.00086.au 3 +disco/disco.00087.au 3 +disco/disco.00088.au 3 +disco/disco.00089.au 3 +disco/disco.00090.au 3 +disco/disco.00091.au 3 +disco/disco.00092.au 3 +disco/disco.00093.au 3 +disco/disco.00094.au 3 +disco/disco.00095.au 3 +disco/disco.00096.au 3 +disco/disco.00097.au 3 +disco/disco.00098.au 3 +disco/disco.00099.au 3 +hiphop/hiphop.00000.au 4 +hiphop/hiphop.00001.au 4 +hiphop/hiphop.00002.au 4 +hiphop/hiphop.00003.au 4 +hiphop/hiphop.00004.au 4 +hiphop/hiphop.00005.au 4 +hiphop/hiphop.00006.au 4 +hiphop/hiphop.00007.au 4 +hiphop/hiphop.00008.au 4 +hiphop/hiphop.00009.au 4 +hiphop/hiphop.00010.au 4 +hiphop/hiphop.00011.au 4 +hiphop/hiphop.00012.au 4 +hiphop/hiphop.00013.au 4 +hiphop/hiphop.00014.au 4 +hiphop/hiphop.00015.au 4 +hiphop/hiphop.00016.au 4 +hiphop/hiphop.00017.au 4 +hiphop/hiphop.00018.au 4 +hiphop/hiphop.00019.au 4 +hiphop/hiphop.00020.au 4 +hiphop/hiphop.00021.au 4 +hiphop/hiphop.00022.au 4 +hiphop/hiphop.00023.au 4 +hiphop/hiphop.00024.au 4 +hiphop/hiphop.00025.au 4 +hiphop/hiphop.00026.au 4 +hiphop/hiphop.00027.au 4 +hiphop/hiphop.00028.au 4 +hiphop/hiphop.00029.au 4 +hiphop/hiphop.00030.au 4 +hiphop/hiphop.00031.au 4 +hiphop/hiphop.00032.au 4 +hiphop/hiphop.00033.au 4 +hiphop/hiphop.00034.au 4 +hiphop/hiphop.00035.au 4 +hiphop/hiphop.00036.au 4 +hiphop/hiphop.00037.au 4 +hiphop/hiphop.00038.au 4 +hiphop/hiphop.00039.au 4 +hiphop/hiphop.00040.au 4 +hiphop/hiphop.00041.au 4 +hiphop/hiphop.00042.au 4 +hiphop/hiphop.00043.au 4 +hiphop/hiphop.00044.au 4 +hiphop/hiphop.00045.au 4 +hiphop/hiphop.00046.au 4 +hiphop/hiphop.00047.au 4 +hiphop/hiphop.00048.au 4 +hiphop/hiphop.00049.au 4 +hiphop/hiphop.00050.au 4 +hiphop/hiphop.00051.au 4 +hiphop/hiphop.00052.au 4 +hiphop/hiphop.00053.au 4 +hiphop/hiphop.00054.au 4 +hiphop/hiphop.00055.au 4 +hiphop/hiphop.00056.au 4 +hiphop/hiphop.00057.au 4 +hiphop/hiphop.00058.au 4 +hiphop/hiphop.00059.au 4 +hiphop/hiphop.00060.au 4 +hiphop/hiphop.00061.au 4 +hiphop/hiphop.00062.au 4 +hiphop/hiphop.00063.au 4 +hiphop/hiphop.00064.au 4 +hiphop/hiphop.00065.au 4 +hiphop/hiphop.00066.au 4 +hiphop/hiphop.00067.au 4 +hiphop/hiphop.00068.au 4 +hiphop/hiphop.00069.au 4 +hiphop/hiphop.00070.au 4 +hiphop/hiphop.00071.au 4 +hiphop/hiphop.00072.au 4 +hiphop/hiphop.00073.au 4 +hiphop/hiphop.00074.au 4 +hiphop/hiphop.00075.au 4 +hiphop/hiphop.00076.au 4 +hiphop/hiphop.00077.au 4 +hiphop/hiphop.00078.au 4 +hiphop/hiphop.00079.au 4 +hiphop/hiphop.00080.au 4 +hiphop/hiphop.00081.au 4 +hiphop/hiphop.00082.au 4 +hiphop/hiphop.00083.au 4 +hiphop/hiphop.00084.au 4 +hiphop/hiphop.00085.au 4 +hiphop/hiphop.00086.au 4 +hiphop/hiphop.00087.au 4 +hiphop/hiphop.00088.au 4 +hiphop/hiphop.00089.au 4 +hiphop/hiphop.00090.au 4 +hiphop/hiphop.00091.au 4 +hiphop/hiphop.00092.au 4 +hiphop/hiphop.00093.au 4 +hiphop/hiphop.00094.au 4 +hiphop/hiphop.00095.au 4 +hiphop/hiphop.00096.au 4 +hiphop/hiphop.00097.au 4 +hiphop/hiphop.00098.au 4 +hiphop/hiphop.00099.au 4 +jazz/jazz.00000.au 5 +jazz/jazz.00001.au 5 +jazz/jazz.00002.au 5 +jazz/jazz.00003.au 5 +jazz/jazz.00004.au 5 +jazz/jazz.00005.au 5 +jazz/jazz.00006.au 5 +jazz/jazz.00007.au 5 +jazz/jazz.00008.au 5 +jazz/jazz.00009.au 5 +jazz/jazz.00010.au 5 +jazz/jazz.00011.au 5 +jazz/jazz.00012.au 5 +jazz/jazz.00013.au 5 +jazz/jazz.00014.au 5 +jazz/jazz.00015.au 5 +jazz/jazz.00016.au 5 +jazz/jazz.00017.au 5 +jazz/jazz.00018.au 5 +jazz/jazz.00019.au 5 +jazz/jazz.00020.au 5 +jazz/jazz.00021.au 5 +jazz/jazz.00022.au 5 +jazz/jazz.00023.au 5 +jazz/jazz.00024.au 5 +jazz/jazz.00025.au 5 +jazz/jazz.00026.au 5 +jazz/jazz.00027.au 5 +jazz/jazz.00028.au 5 +jazz/jazz.00029.au 5 +jazz/jazz.00030.au 5 +jazz/jazz.00031.au 5 +jazz/jazz.00032.au 5 +jazz/jazz.00033.au 5 +jazz/jazz.00034.au 5 +jazz/jazz.00035.au 5 +jazz/jazz.00036.au 5 +jazz/jazz.00037.au 5 +jazz/jazz.00038.au 5 +jazz/jazz.00039.au 5 +jazz/jazz.00040.au 5 +jazz/jazz.00041.au 5 +jazz/jazz.00042.au 5 +jazz/jazz.00043.au 5 +jazz/jazz.00044.au 5 +jazz/jazz.00045.au 5 +jazz/jazz.00046.au 5 +jazz/jazz.00047.au 5 +jazz/jazz.00048.au 5 +jazz/jazz.00049.au 5 +jazz/jazz.00050.au 5 +jazz/jazz.00051.au 5 +jazz/jazz.00052.au 5 +jazz/jazz.00053.au 5 +jazz/jazz.00054.au 5 +jazz/jazz.00055.au 5 +jazz/jazz.00056.au 5 +jazz/jazz.00057.au 5 +jazz/jazz.00058.au 5 +jazz/jazz.00059.au 5 +jazz/jazz.00060.au 5 +jazz/jazz.00061.au 5 +jazz/jazz.00062.au 5 +jazz/jazz.00063.au 5 +jazz/jazz.00064.au 5 +jazz/jazz.00065.au 5 +jazz/jazz.00066.au 5 +jazz/jazz.00067.au 5 +jazz/jazz.00068.au 5 +jazz/jazz.00069.au 5 +jazz/jazz.00070.au 5 +jazz/jazz.00071.au 5 +jazz/jazz.00072.au 5 +jazz/jazz.00073.au 5 +jazz/jazz.00074.au 5 +jazz/jazz.00075.au 5 +jazz/jazz.00076.au 5 +jazz/jazz.00077.au 5 +jazz/jazz.00078.au 5 +jazz/jazz.00079.au 5 +jazz/jazz.00080.au 5 +jazz/jazz.00081.au 5 +jazz/jazz.00082.au 5 +jazz/jazz.00083.au 5 +jazz/jazz.00084.au 5 +jazz/jazz.00085.au 5 +jazz/jazz.00086.au 5 +jazz/jazz.00087.au 5 +jazz/jazz.00088.au 5 +jazz/jazz.00089.au 5 +jazz/jazz.00090.au 5 +jazz/jazz.00091.au 5 +jazz/jazz.00092.au 5 +jazz/jazz.00093.au 5 +jazz/jazz.00094.au 5 +jazz/jazz.00095.au 5 +jazz/jazz.00096.au 5 +jazz/jazz.00097.au 5 +jazz/jazz.00098.au 5 +jazz/jazz.00099.au 5 +metal/metal.00000.au 6 +metal/metal.00001.au 6 +metal/metal.00002.au 6 +metal/metal.00003.au 6 +metal/metal.00004.au 6 +metal/metal.00005.au 6 +metal/metal.00006.au 6 +metal/metal.00007.au 6 +metal/metal.00008.au 6 +metal/metal.00009.au 6 +metal/metal.00010.au 6 +metal/metal.00011.au 6 +metal/metal.00012.au 6 +metal/metal.00013.au 6 +metal/metal.00014.au 6 +metal/metal.00015.au 6 +metal/metal.00016.au 6 +metal/metal.00017.au 6 +metal/metal.00018.au 6 +metal/metal.00019.au 6 +metal/metal.00020.au 6 +metal/metal.00021.au 6 +metal/metal.00022.au 6 +metal/metal.00023.au 6 +metal/metal.00024.au 6 +metal/metal.00025.au 6 +metal/metal.00026.au 6 +metal/metal.00027.au 6 +metal/metal.00028.au 6 +metal/metal.00029.au 6 +metal/metal.00030.au 6 +metal/metal.00031.au 6 +metal/metal.00032.au 6 +metal/metal.00033.au 6 +metal/metal.00034.au 6 +metal/metal.00035.au 6 +metal/metal.00036.au 6 +metal/metal.00037.au 6 +metal/metal.00038.au 6 +metal/metal.00039.au 6 +metal/metal.00040.au 6 +metal/metal.00041.au 6 +metal/metal.00042.au 6 +metal/metal.00043.au 6 +metal/metal.00044.au 6 +metal/metal.00045.au 6 +metal/metal.00046.au 6 +metal/metal.00047.au 6 +metal/metal.00048.au 6 +metal/metal.00049.au 6 +metal/metal.00050.au 6 +metal/metal.00051.au 6 +metal/metal.00052.au 6 +metal/metal.00053.au 6 +metal/metal.00054.au 6 +metal/metal.00055.au 6 +metal/metal.00056.au 6 +metal/metal.00057.au 6 +metal/metal.00058.au 6 +metal/metal.00059.au 6 +metal/metal.00060.au 6 +metal/metal.00061.au 6 +metal/metal.00062.au 6 +metal/metal.00063.au 6 +metal/metal.00064.au 6 +metal/metal.00065.au 6 +metal/metal.00066.au 6 +metal/metal.00067.au 6 +metal/metal.00068.au 6 +metal/metal.00069.au 6 +metal/metal.00070.au 6 +metal/metal.00071.au 6 +metal/metal.00072.au 6 +metal/metal.00073.au 6 +metal/metal.00074.au 6 +metal/metal.00075.au 6 +metal/metal.00076.au 6 +metal/metal.00077.au 6 +metal/metal.00078.au 6 +metal/metal.00079.au 6 +metal/metal.00080.au 6 +metal/metal.00081.au 6 +metal/metal.00082.au 6 +metal/metal.00083.au 6 +metal/metal.00084.au 6 +metal/metal.00085.au 6 +metal/metal.00086.au 6 +metal/metal.00087.au 6 +metal/metal.00088.au 6 +metal/metal.00089.au 6 +metal/metal.00090.au 6 +metal/metal.00091.au 6 +metal/metal.00092.au 6 +metal/metal.00093.au 6 +metal/metal.00094.au 6 +metal/metal.00095.au 6 +metal/metal.00096.au 6 +metal/metal.00097.au 6 +metal/metal.00098.au 6 +metal/metal.00099.au 6 +pop/pop.00000.au 7 +pop/pop.00001.au 7 +pop/pop.00002.au 7 +pop/pop.00003.au 7 +pop/pop.00004.au 7 +pop/pop.00005.au 7 +pop/pop.00006.au 7 +pop/pop.00007.au 7 +pop/pop.00008.au 7 +pop/pop.00009.au 7 +pop/pop.00010.au 7 +pop/pop.00011.au 7 +pop/pop.00012.au 7 +pop/pop.00013.au 7 +pop/pop.00014.au 7 +pop/pop.00015.au 7 +pop/pop.00016.au 7 +pop/pop.00017.au 7 +pop/pop.00018.au 7 +pop/pop.00019.au 7 +pop/pop.00020.au 7 +pop/pop.00021.au 7 +pop/pop.00022.au 7 +pop/pop.00023.au 7 +pop/pop.00024.au 7 +pop/pop.00025.au 7 +pop/pop.00026.au 7 +pop/pop.00027.au 7 +pop/pop.00028.au 7 +pop/pop.00029.au 7 +pop/pop.00030.au 7 +pop/pop.00031.au 7 +pop/pop.00032.au 7 +pop/pop.00033.au 7 +pop/pop.00034.au 7 +pop/pop.00035.au 7 +pop/pop.00036.au 7 +pop/pop.00037.au 7 +pop/pop.00038.au 7 +pop/pop.00039.au 7 +pop/pop.00040.au 7 +pop/pop.00041.au 7 +pop/pop.00042.au 7 +pop/pop.00043.au 7 +pop/pop.00044.au 7 +pop/pop.00045.au 7 +pop/pop.00046.au 7 +pop/pop.00047.au 7 +pop/pop.00048.au 7 +pop/pop.00049.au 7 +pop/pop.00050.au 7 +pop/pop.00051.au 7 +pop/pop.00052.au 7 +pop/pop.00053.au 7 +pop/pop.00054.au 7 +pop/pop.00055.au 7 +pop/pop.00056.au 7 +pop/pop.00057.au 7 +pop/pop.00058.au 7 +pop/pop.00059.au 7 +pop/pop.00060.au 7 +pop/pop.00061.au 7 +pop/pop.00062.au 7 +pop/pop.00063.au 7 +pop/pop.00064.au 7 +pop/pop.00065.au 7 +pop/pop.00066.au 7 +pop/pop.00067.au 7 +pop/pop.00068.au 7 +pop/pop.00069.au 7 +pop/pop.00070.au 7 +pop/pop.00071.au 7 +pop/pop.00072.au 7 +pop/pop.00073.au 7 +pop/pop.00074.au 7 +pop/pop.00075.au 7 +pop/pop.00076.au 7 +pop/pop.00077.au 7 +pop/pop.00078.au 7 +pop/pop.00079.au 7 +pop/pop.00080.au 7 +pop/pop.00081.au 7 +pop/pop.00082.au 7 +pop/pop.00083.au 7 +pop/pop.00084.au 7 +pop/pop.00085.au 7 +pop/pop.00086.au 7 +pop/pop.00087.au 7 +pop/pop.00088.au 7 +pop/pop.00089.au 7 +pop/pop.00090.au 7 +pop/pop.00091.au 7 +pop/pop.00092.au 7 +pop/pop.00093.au 7 +pop/pop.00094.au 7 +pop/pop.00095.au 7 +pop/pop.00096.au 7 +pop/pop.00097.au 7 +pop/pop.00098.au 7 +pop/pop.00099.au 7 +reggae/reggae.00000.au 8 +reggae/reggae.00001.au 8 +reggae/reggae.00002.au 8 +reggae/reggae.00003.au 8 +reggae/reggae.00004.au 8 +reggae/reggae.00005.au 8 +reggae/reggae.00006.au 8 +reggae/reggae.00007.au 8 +reggae/reggae.00008.au 8 +reggae/reggae.00009.au 8 +reggae/reggae.00010.au 8 +reggae/reggae.00011.au 8 +reggae/reggae.00012.au 8 +reggae/reggae.00013.au 8 +reggae/reggae.00014.au 8 +reggae/reggae.00015.au 8 +reggae/reggae.00016.au 8 +reggae/reggae.00017.au 8 +reggae/reggae.00018.au 8 +reggae/reggae.00019.au 8 +reggae/reggae.00020.au 8 +reggae/reggae.00021.au 8 +reggae/reggae.00022.au 8 +reggae/reggae.00023.au 8 +reggae/reggae.00024.au 8 +reggae/reggae.00025.au 8 +reggae/reggae.00026.au 8 +reggae/reggae.00027.au 8 +reggae/reggae.00028.au 8 +reggae/reggae.00029.au 8 +reggae/reggae.00030.au 8 +reggae/reggae.00031.au 8 +reggae/reggae.00032.au 8 +reggae/reggae.00033.au 8 +reggae/reggae.00034.au 8 +reggae/reggae.00035.au 8 +reggae/reggae.00036.au 8 +reggae/reggae.00037.au 8 +reggae/reggae.00038.au 8 +reggae/reggae.00039.au 8 +reggae/reggae.00040.au 8 +reggae/reggae.00041.au 8 +reggae/reggae.00042.au 8 +reggae/reggae.00043.au 8 +reggae/reggae.00044.au 8 +reggae/reggae.00045.au 8 +reggae/reggae.00046.au 8 +reggae/reggae.00047.au 8 +reggae/reggae.00048.au 8 +reggae/reggae.00049.au 8 +reggae/reggae.00050.au 8 +reggae/reggae.00051.au 8 +reggae/reggae.00052.au 8 +reggae/reggae.00053.au 8 +reggae/reggae.00054.au 8 +reggae/reggae.00055.au 8 +reggae/reggae.00056.au 8 +reggae/reggae.00057.au 8 +reggae/reggae.00058.au 8 +reggae/reggae.00059.au 8 +reggae/reggae.00060.au 8 +reggae/reggae.00061.au 8 +reggae/reggae.00062.au 8 +reggae/reggae.00063.au 8 +reggae/reggae.00064.au 8 +reggae/reggae.00065.au 8 +reggae/reggae.00066.au 8 +reggae/reggae.00067.au 8 +reggae/reggae.00068.au 8 +reggae/reggae.00069.au 8 +reggae/reggae.00070.au 8 +reggae/reggae.00071.au 8 +reggae/reggae.00072.au 8 +reggae/reggae.00073.au 8 +reggae/reggae.00074.au 8 +reggae/reggae.00075.au 8 +reggae/reggae.00076.au 8 +reggae/reggae.00077.au 8 +reggae/reggae.00078.au 8 +reggae/reggae.00079.au 8 +reggae/reggae.00080.au 8 +reggae/reggae.00081.au 8 +reggae/reggae.00082.au 8 +reggae/reggae.00083.au 8 +reggae/reggae.00084.au 8 +reggae/reggae.00085.au 8 +reggae/reggae.00086.au 8 +reggae/reggae.00087.au 8 +reggae/reggae.00088.au 8 +reggae/reggae.00089.au 8 +reggae/reggae.00090.au 8 +reggae/reggae.00091.au 8 +reggae/reggae.00092.au 8 +reggae/reggae.00093.au 8 +reggae/reggae.00094.au 8 +reggae/reggae.00095.au 8 +reggae/reggae.00096.au 8 +reggae/reggae.00097.au 8 +reggae/reggae.00098.au 8 +reggae/reggae.00099.au 8 +rock/rock.00000.au 9 +rock/rock.00001.au 9 +rock/rock.00002.au 9 +rock/rock.00003.au 9 +rock/rock.00004.au 9 +rock/rock.00005.au 9 +rock/rock.00006.au 9 +rock/rock.00007.au 9 +rock/rock.00008.au 9 +rock/rock.00009.au 9 +rock/rock.00010.au 9 +rock/rock.00011.au 9 +rock/rock.00012.au 9 +rock/rock.00013.au 9 +rock/rock.00014.au 9 +rock/rock.00015.au 9 +rock/rock.00016.au 9 +rock/rock.00017.au 9 +rock/rock.00018.au 9 +rock/rock.00019.au 9 +rock/rock.00020.au 9 +rock/rock.00021.au 9 +rock/rock.00022.au 9 +rock/rock.00023.au 9 +rock/rock.00024.au 9 +rock/rock.00025.au 9 +rock/rock.00026.au 9 +rock/rock.00027.au 9 +rock/rock.00028.au 9 +rock/rock.00029.au 9 +rock/rock.00030.au 9 +rock/rock.00031.au 9 +rock/rock.00032.au 9 +rock/rock.00033.au 9 +rock/rock.00034.au 9 +rock/rock.00035.au 9 +rock/rock.00036.au 9 +rock/rock.00037.au 9 +rock/rock.00038.au 9 +rock/rock.00039.au 9 +rock/rock.00040.au 9 +rock/rock.00041.au 9 +rock/rock.00042.au 9 +rock/rock.00043.au 9 +rock/rock.00044.au 9 +rock/rock.00045.au 9 +rock/rock.00046.au 9 +rock/rock.00047.au 9 +rock/rock.00048.au 9 +rock/rock.00049.au 9 +rock/rock.00050.au 9 +rock/rock.00051.au 9 +rock/rock.00052.au 9 +rock/rock.00053.au 9 +rock/rock.00054.au 9 +rock/rock.00055.au 9 +rock/rock.00056.au 9 +rock/rock.00057.au 9 +rock/rock.00058.au 9 +rock/rock.00059.au 9 +rock/rock.00060.au 9 +rock/rock.00061.au 9 +rock/rock.00062.au 9 +rock/rock.00063.au 9 +rock/rock.00064.au 9 +rock/rock.00065.au 9 +rock/rock.00066.au 9 +rock/rock.00067.au 9 +rock/rock.00068.au 9 +rock/rock.00069.au 9 +rock/rock.00070.au 9 +rock/rock.00071.au 9 +rock/rock.00072.au 9 +rock/rock.00073.au 9 +rock/rock.00074.au 9 +rock/rock.00075.au 9 +rock/rock.00076.au 9 +rock/rock.00077.au 9 +rock/rock.00078.au 9 +rock/rock.00079.au 9 +rock/rock.00080.au 9 +rock/rock.00081.au 9 +rock/rock.00082.au 9 +rock/rock.00083.au 9 +rock/rock.00084.au 9 +rock/rock.00085.au 9 +rock/rock.00086.au 9 +rock/rock.00087.au 9 +rock/rock.00088.au 9 +rock/rock.00089.au 9 +rock/rock.00090.au 9 +rock/rock.00091.au 9 +rock/rock.00092.au 9 +rock/rock.00093.au 9 +rock/rock.00094.au 9 +rock/rock.00095.au 9 +rock/rock.00096.au 9 +rock/rock.00097.au 9 +rock/rock.00098.au 9 +rock/rock.00099.au 9 diff --git a/datasets/gtzan/splits/genre.val b/datasets/gtzan/splits/genre.val new file mode 100644 index 0000000..6018058 --- /dev/null +++ b/datasets/gtzan/splits/genre.val @@ -0,0 +1,300 @@ +blues/blues.00002.au 0 +blues/blues.00003.au 0 +blues/blues.00006.au 0 +blues/blues.00007.au 0 +blues/blues.00008.au 0 +blues/blues.00013.au 0 +blues/blues.00016.au 0 +blues/blues.00022.au 0 +blues/blues.00024.au 0 +blues/blues.00026.au 0 +blues/blues.00030.au 0 +blues/blues.00033.au 0 +blues/blues.00043.au 0 +blues/blues.00045.au 0 +blues/blues.00048.au 0 +blues/blues.00053.au 0 +blues/blues.00054.au 0 +blues/blues.00055.au 0 +blues/blues.00062.au 0 +blues/blues.00071.au 0 +blues/blues.00073.au 0 +blues/blues.00075.au 0 +blues/blues.00076.au 0 +blues/blues.00078.au 0 +blues/blues.00082.au 0 +blues/blues.00086.au 0 +blues/blues.00092.au 0 +blues/blues.00093.au 0 +blues/blues.00095.au 0 +blues/blues.00099.au 0 +classical/classical.00002.au 1 +classical/classical.00010.au 1 +classical/classical.00017.au 1 +classical/classical.00019.au 1 +classical/classical.00027.au 1 +classical/classical.00031.au 1 +classical/classical.00032.au 1 +classical/classical.00033.au 1 +classical/classical.00035.au 1 +classical/classical.00036.au 1 +classical/classical.00038.au 1 +classical/classical.00039.au 1 +classical/classical.00044.au 1 +classical/classical.00046.au 1 +classical/classical.00051.au 1 +classical/classical.00052.au 1 +classical/classical.00056.au 1 +classical/classical.00062.au 1 +classical/classical.00065.au 1 +classical/classical.00069.au 1 +classical/classical.00073.au 1 +classical/classical.00078.au 1 +classical/classical.00080.au 1 +classical/classical.00081.au 1 +classical/classical.00082.au 1 +classical/classical.00084.au 1 +classical/classical.00092.au 1 +classical/classical.00093.au 1 +classical/classical.00094.au 1 +classical/classical.00097.au 1 +country/country.00000.au 2 +country/country.00002.au 2 +country/country.00003.au 2 +country/country.00005.au 2 +country/country.00011.au 2 +country/country.00013.au 2 +country/country.00014.au 2 +country/country.00016.au 2 +country/country.00021.au 2 +country/country.00023.au 2 +country/country.00024.au 2 +country/country.00025.au 2 +country/country.00027.au 2 +country/country.00028.au 2 +country/country.00029.au 2 +country/country.00030.au 2 +country/country.00035.au 2 +country/country.00044.au 2 +country/country.00056.au 2 +country/country.00061.au 2 +country/country.00062.au 2 +country/country.00073.au 2 +country/country.00074.au 2 +country/country.00076.au 2 +country/country.00077.au 2 +country/country.00079.au 2 +country/country.00083.au 2 +country/country.00084.au 2 +country/country.00092.au 2 +country/country.00099.au 2 +disco/disco.00004.au 3 +disco/disco.00006.au 3 +disco/disco.00008.au 3 +disco/disco.00009.au 3 +disco/disco.00012.au 3 +disco/disco.00015.au 3 +disco/disco.00017.au 3 +disco/disco.00023.au 3 +disco/disco.00025.au 3 +disco/disco.00034.au 3 +disco/disco.00042.au 3 +disco/disco.00045.au 3 +disco/disco.00047.au 3 +disco/disco.00050.au 3 +disco/disco.00059.au 3 +disco/disco.00064.au 3 +disco/disco.00065.au 3 +disco/disco.00067.au 3 +disco/disco.00071.au 3 +disco/disco.00073.au 3 +disco/disco.00075.au 3 +disco/disco.00077.au 3 +disco/disco.00079.au 3 +disco/disco.00082.au 3 +disco/disco.00083.au 3 +disco/disco.00086.au 3 +disco/disco.00093.au 3 +disco/disco.00096.au 3 +disco/disco.00097.au 3 +disco/disco.00098.au 3 +hiphop/hiphop.00002.au 4 +hiphop/hiphop.00004.au 4 +hiphop/hiphop.00005.au 4 +hiphop/hiphop.00010.au 4 +hiphop/hiphop.00013.au 4 +hiphop/hiphop.00014.au 4 +hiphop/hiphop.00016.au 4 +hiphop/hiphop.00019.au 4 +hiphop/hiphop.00020.au 4 +hiphop/hiphop.00024.au 4 +hiphop/hiphop.00026.au 4 +hiphop/hiphop.00029.au 4 +hiphop/hiphop.00035.au 4 +hiphop/hiphop.00037.au 4 +hiphop/hiphop.00041.au 4 +hiphop/hiphop.00043.au 4 +hiphop/hiphop.00047.au 4 +hiphop/hiphop.00053.au 4 +hiphop/hiphop.00054.au 4 +hiphop/hiphop.00060.au 4 +hiphop/hiphop.00063.au 4 +hiphop/hiphop.00070.au 4 +hiphop/hiphop.00071.au 4 +hiphop/hiphop.00076.au 4 +hiphop/hiphop.00080.au 4 +hiphop/hiphop.00081.au 4 +hiphop/hiphop.00085.au 4 +hiphop/hiphop.00095.au 4 +hiphop/hiphop.00096.au 4 +hiphop/hiphop.00097.au 4 +jazz/jazz.00010.au 5 +jazz/jazz.00012.au 5 +jazz/jazz.00013.au 5 +jazz/jazz.00017.au 5 +jazz/jazz.00020.au 5 +jazz/jazz.00022.au 5 +jazz/jazz.00023.au 5 +jazz/jazz.00024.au 5 +jazz/jazz.00028.au 5 +jazz/jazz.00032.au 5 +jazz/jazz.00034.au 5 +jazz/jazz.00035.au 5 +jazz/jazz.00037.au 5 +jazz/jazz.00039.au 5 +jazz/jazz.00040.au 5 +jazz/jazz.00042.au 5 +jazz/jazz.00046.au 5 +jazz/jazz.00055.au 5 +jazz/jazz.00056.au 5 +jazz/jazz.00057.au 5 +jazz/jazz.00060.au 5 +jazz/jazz.00063.au 5 +jazz/jazz.00066.au 5 +jazz/jazz.00069.au 5 +jazz/jazz.00070.au 5 +jazz/jazz.00071.au 5 +jazz/jazz.00074.au 5 +jazz/jazz.00087.au 5 +jazz/jazz.00094.au 5 +jazz/jazz.00097.au 5 +metal/metal.00000.au 6 +metal/metal.00009.au 6 +metal/metal.00013.au 6 +metal/metal.00016.au 6 +metal/metal.00017.au 6 +metal/metal.00020.au 6 +metal/metal.00023.au 6 +metal/metal.00029.au 6 +metal/metal.00030.au 6 +metal/metal.00032.au 6 +metal/metal.00034.au 6 +metal/metal.00037.au 6 +metal/metal.00039.au 6 +metal/metal.00044.au 6 +metal/metal.00045.au 6 +metal/metal.00047.au 6 +metal/metal.00048.au 6 +metal/metal.00054.au 6 +metal/metal.00059.au 6 +metal/metal.00060.au 6 +metal/metal.00061.au 6 +metal/metal.00067.au 6 +metal/metal.00070.au 6 +metal/metal.00074.au 6 +metal/metal.00081.au 6 +metal/metal.00083.au 6 +metal/metal.00086.au 6 +metal/metal.00091.au 6 +metal/metal.00093.au 6 +metal/metal.00097.au 6 +pop/pop.00002.au 7 +pop/pop.00005.au 7 +pop/pop.00011.au 7 +pop/pop.00012.au 7 +pop/pop.00013.au 7 +pop/pop.00015.au 7 +pop/pop.00017.au 7 +pop/pop.00018.au 7 +pop/pop.00020.au 7 +pop/pop.00022.au 7 +pop/pop.00026.au 7 +pop/pop.00030.au 7 +pop/pop.00037.au 7 +pop/pop.00040.au 7 +pop/pop.00045.au 7 +pop/pop.00049.au 7 +pop/pop.00050.au 7 +pop/pop.00051.au 7 +pop/pop.00052.au 7 +pop/pop.00066.au 7 +pop/pop.00071.au 7 +pop/pop.00074.au 7 +pop/pop.00078.au 7 +pop/pop.00085.au 7 +pop/pop.00086.au 7 +pop/pop.00088.au 7 +pop/pop.00091.au 7 +pop/pop.00093.au 7 +pop/pop.00096.au 7 +pop/pop.00097.au 7 +reggae/reggae.00001.au 8 +reggae/reggae.00003.au 8 +reggae/reggae.00011.au 8 +reggae/reggae.00012.au 8 +reggae/reggae.00016.au 8 +reggae/reggae.00017.au 8 +reggae/reggae.00019.au 8 +reggae/reggae.00023.au 8 +reggae/reggae.00024.au 8 +reggae/reggae.00029.au 8 +reggae/reggae.00030.au 8 +reggae/reggae.00033.au 8 +reggae/reggae.00039.au 8 +reggae/reggae.00041.au 8 +reggae/reggae.00043.au 8 +reggae/reggae.00044.au 8 +reggae/reggae.00055.au 8 +reggae/reggae.00057.au 8 +reggae/reggae.00062.au 8 +reggae/reggae.00064.au 8 +reggae/reggae.00070.au 8 +reggae/reggae.00072.au 8 +reggae/reggae.00079.au 8 +reggae/reggae.00080.au 8 +reggae/reggae.00086.au 8 +reggae/reggae.00088.au 8 +reggae/reggae.00092.au 8 +reggae/reggae.00097.au 8 +reggae/reggae.00098.au 8 +reggae/reggae.00099.au 8 +rock/rock.00003.au 9 +rock/rock.00006.au 9 +rock/rock.00014.au 9 +rock/rock.00020.au 9 +rock/rock.00021.au 9 +rock/rock.00023.au 9 +rock/rock.00025.au 9 +rock/rock.00026.au 9 +rock/rock.00028.au 9 +rock/rock.00031.au 9 +rock/rock.00032.au 9 +rock/rock.00036.au 9 +rock/rock.00039.au 9 +rock/rock.00041.au 9 +rock/rock.00042.au 9 +rock/rock.00044.au 9 +rock/rock.00046.au 9 +rock/rock.00048.au 9 +rock/rock.00051.au 9 +rock/rock.00055.au 9 +rock/rock.00061.au 9 +rock/rock.00068.au 9 +rock/rock.00070.au 9 +rock/rock.00073.au 9 +rock/rock.00075.au 9 +rock/rock.00078.au 9 +rock/rock.00082.au 9 +rock/rock.00083.au 9 +rock/rock.00086.au 9 +rock/rock.00095.au 9 diff --git a/datasets/gtzan/splits/genres.trainval b/datasets/gtzan/splits/genres.trainval new file mode 100644 index 0000000..67241b2 --- /dev/null +++ b/datasets/gtzan/splits/genres.trainval @@ -0,0 +1,1000 @@ +blues/blues.00000.au +blues/blues.00001.au +blues/blues.00002.au +blues/blues.00003.au +blues/blues.00004.au +blues/blues.00005.au +blues/blues.00006.au +blues/blues.00007.au +blues/blues.00008.au +blues/blues.00009.au +blues/blues.00010.au +blues/blues.00011.au +blues/blues.00012.au +blues/blues.00013.au +blues/blues.00014.au +blues/blues.00015.au +blues/blues.00016.au +blues/blues.00017.au +blues/blues.00018.au +blues/blues.00019.au +blues/blues.00020.au +blues/blues.00021.au +blues/blues.00022.au +blues/blues.00023.au +blues/blues.00024.au +blues/blues.00025.au +blues/blues.00026.au +blues/blues.00027.au +blues/blues.00028.au +blues/blues.00029.au +blues/blues.00030.au +blues/blues.00031.au +blues/blues.00032.au +blues/blues.00033.au +blues/blues.00034.au +blues/blues.00035.au +blues/blues.00036.au +blues/blues.00037.au +blues/blues.00038.au +blues/blues.00039.au +blues/blues.00040.au +blues/blues.00041.au +blues/blues.00042.au +blues/blues.00043.au +blues/blues.00044.au +blues/blues.00045.au +blues/blues.00046.au +blues/blues.00047.au +blues/blues.00048.au +blues/blues.00049.au +blues/blues.00050.au +blues/blues.00051.au +blues/blues.00052.au +blues/blues.00053.au +blues/blues.00054.au +blues/blues.00055.au +blues/blues.00056.au +blues/blues.00057.au +blues/blues.00058.au +blues/blues.00059.au +blues/blues.00060.au +blues/blues.00061.au +blues/blues.00062.au +blues/blues.00063.au +blues/blues.00064.au +blues/blues.00065.au +blues/blues.00066.au +blues/blues.00067.au +blues/blues.00068.au +blues/blues.00069.au +blues/blues.00070.au +blues/blues.00071.au +blues/blues.00072.au +blues/blues.00073.au +blues/blues.00074.au +blues/blues.00075.au +blues/blues.00076.au +blues/blues.00077.au +blues/blues.00078.au +blues/blues.00079.au +blues/blues.00080.au +blues/blues.00081.au +blues/blues.00082.au +blues/blues.00083.au +blues/blues.00084.au +blues/blues.00085.au +blues/blues.00086.au +blues/blues.00087.au +blues/blues.00088.au +blues/blues.00089.au +blues/blues.00090.au +blues/blues.00091.au +blues/blues.00092.au +blues/blues.00093.au +blues/blues.00094.au +blues/blues.00095.au +blues/blues.00096.au +blues/blues.00097.au +blues/blues.00098.au +blues/blues.00099.au +classical/classical.00000.au +classical/classical.00001.au +classical/classical.00002.au +classical/classical.00003.au +classical/classical.00004.au +classical/classical.00005.au +classical/classical.00006.au +classical/classical.00007.au +classical/classical.00008.au +classical/classical.00009.au +classical/classical.00010.au +classical/classical.00011.au +classical/classical.00012.au +classical/classical.00013.au +classical/classical.00014.au +classical/classical.00015.au +classical/classical.00016.au +classical/classical.00017.au +classical/classical.00018.au +classical/classical.00019.au +classical/classical.00020.au +classical/classical.00021.au +classical/classical.00022.au +classical/classical.00023.au +classical/classical.00024.au +classical/classical.00025.au +classical/classical.00026.au +classical/classical.00027.au +classical/classical.00028.au +classical/classical.00029.au +classical/classical.00030.au +classical/classical.00031.au +classical/classical.00032.au +classical/classical.00033.au +classical/classical.00034.au +classical/classical.00035.au +classical/classical.00036.au +classical/classical.00037.au +classical/classical.00038.au +classical/classical.00039.au +classical/classical.00040.au +classical/classical.00041.au +classical/classical.00042.au +classical/classical.00043.au +classical/classical.00044.au +classical/classical.00045.au +classical/classical.00046.au +classical/classical.00047.au +classical/classical.00048.au +classical/classical.00049.au +classical/classical.00050.au +classical/classical.00051.au +classical/classical.00052.au +classical/classical.00053.au +classical/classical.00054.au +classical/classical.00055.au +classical/classical.00056.au +classical/classical.00057.au +classical/classical.00058.au +classical/classical.00059.au +classical/classical.00060.au +classical/classical.00061.au +classical/classical.00062.au +classical/classical.00063.au +classical/classical.00064.au +classical/classical.00065.au +classical/classical.00066.au +classical/classical.00067.au +classical/classical.00068.au +classical/classical.00069.au +classical/classical.00070.au +classical/classical.00071.au +classical/classical.00072.au +classical/classical.00073.au +classical/classical.00074.au +classical/classical.00075.au +classical/classical.00076.au +classical/classical.00077.au +classical/classical.00078.au +classical/classical.00079.au +classical/classical.00080.au +classical/classical.00081.au +classical/classical.00082.au +classical/classical.00083.au +classical/classical.00084.au +classical/classical.00085.au +classical/classical.00086.au +classical/classical.00087.au +classical/classical.00088.au +classical/classical.00089.au +classical/classical.00090.au +classical/classical.00091.au +classical/classical.00092.au +classical/classical.00093.au +classical/classical.00094.au +classical/classical.00095.au +classical/classical.00096.au +classical/classical.00097.au +classical/classical.00098.au +classical/classical.00099.au +country/country.00000.au +country/country.00001.au +country/country.00002.au +country/country.00003.au +country/country.00004.au +country/country.00005.au +country/country.00006.au +country/country.00007.au +country/country.00008.au +country/country.00009.au +country/country.00010.au +country/country.00011.au +country/country.00012.au +country/country.00013.au +country/country.00014.au +country/country.00015.au +country/country.00016.au +country/country.00017.au +country/country.00018.au +country/country.00019.au +country/country.00020.au +country/country.00021.au +country/country.00022.au +country/country.00023.au +country/country.00024.au +country/country.00025.au +country/country.00026.au +country/country.00027.au +country/country.00028.au +country/country.00029.au +country/country.00030.au +country/country.00031.au +country/country.00032.au +country/country.00033.au +country/country.00034.au +country/country.00035.au +country/country.00036.au +country/country.00037.au +country/country.00038.au +country/country.00039.au +country/country.00040.au +country/country.00041.au +country/country.00042.au +country/country.00043.au +country/country.00044.au +country/country.00045.au +country/country.00046.au +country/country.00047.au +country/country.00048.au +country/country.00049.au +country/country.00050.au +country/country.00051.au +country/country.00052.au +country/country.00053.au +country/country.00054.au +country/country.00055.au +country/country.00056.au +country/country.00057.au +country/country.00058.au +country/country.00059.au +country/country.00060.au +country/country.00061.au +country/country.00062.au +country/country.00063.au +country/country.00064.au +country/country.00065.au +country/country.00066.au +country/country.00067.au +country/country.00068.au +country/country.00069.au +country/country.00070.au +country/country.00071.au +country/country.00072.au +country/country.00073.au +country/country.00074.au +country/country.00075.au +country/country.00076.au +country/country.00077.au +country/country.00078.au +country/country.00079.au +country/country.00080.au +country/country.00081.au +country/country.00082.au +country/country.00083.au +country/country.00084.au +country/country.00085.au +country/country.00086.au +country/country.00087.au +country/country.00088.au +country/country.00089.au +country/country.00090.au +country/country.00091.au +country/country.00092.au +country/country.00093.au +country/country.00094.au +country/country.00095.au +country/country.00096.au +country/country.00097.au +country/country.00098.au +country/country.00099.au +disco/disco.00000.au +disco/disco.00001.au +disco/disco.00002.au +disco/disco.00003.au +disco/disco.00004.au +disco/disco.00005.au +disco/disco.00006.au +disco/disco.00007.au +disco/disco.00008.au +disco/disco.00009.au +disco/disco.00010.au +disco/disco.00011.au +disco/disco.00012.au +disco/disco.00013.au +disco/disco.00014.au +disco/disco.00015.au +disco/disco.00016.au +disco/disco.00017.au +disco/disco.00018.au +disco/disco.00019.au +disco/disco.00020.au +disco/disco.00021.au +disco/disco.00022.au +disco/disco.00023.au +disco/disco.00024.au +disco/disco.00025.au +disco/disco.00026.au +disco/disco.00027.au +disco/disco.00028.au +disco/disco.00029.au +disco/disco.00030.au +disco/disco.00031.au +disco/disco.00032.au +disco/disco.00033.au +disco/disco.00034.au +disco/disco.00035.au +disco/disco.00036.au +disco/disco.00037.au +disco/disco.00038.au +disco/disco.00039.au +disco/disco.00040.au +disco/disco.00041.au +disco/disco.00042.au +disco/disco.00043.au +disco/disco.00044.au +disco/disco.00045.au +disco/disco.00046.au +disco/disco.00047.au +disco/disco.00048.au +disco/disco.00049.au +disco/disco.00050.au +disco/disco.00051.au +disco/disco.00052.au +disco/disco.00053.au +disco/disco.00054.au +disco/disco.00055.au +disco/disco.00056.au +disco/disco.00057.au +disco/disco.00058.au +disco/disco.00059.au +disco/disco.00060.au +disco/disco.00061.au +disco/disco.00062.au +disco/disco.00063.au +disco/disco.00064.au +disco/disco.00065.au +disco/disco.00066.au +disco/disco.00067.au +disco/disco.00068.au +disco/disco.00069.au +disco/disco.00070.au +disco/disco.00071.au +disco/disco.00072.au +disco/disco.00073.au +disco/disco.00074.au +disco/disco.00075.au +disco/disco.00076.au +disco/disco.00077.au +disco/disco.00078.au +disco/disco.00079.au +disco/disco.00080.au +disco/disco.00081.au +disco/disco.00082.au +disco/disco.00083.au +disco/disco.00084.au +disco/disco.00085.au +disco/disco.00086.au +disco/disco.00087.au +disco/disco.00088.au +disco/disco.00089.au +disco/disco.00090.au +disco/disco.00091.au +disco/disco.00092.au +disco/disco.00093.au +disco/disco.00094.au +disco/disco.00095.au +disco/disco.00096.au +disco/disco.00097.au +disco/disco.00098.au +disco/disco.00099.au +hiphop/hiphop.00000.au +hiphop/hiphop.00001.au +hiphop/hiphop.00002.au +hiphop/hiphop.00003.au +hiphop/hiphop.00004.au +hiphop/hiphop.00005.au +hiphop/hiphop.00006.au +hiphop/hiphop.00007.au +hiphop/hiphop.00008.au +hiphop/hiphop.00009.au +hiphop/hiphop.00010.au +hiphop/hiphop.00011.au +hiphop/hiphop.00012.au +hiphop/hiphop.00013.au +hiphop/hiphop.00014.au +hiphop/hiphop.00015.au +hiphop/hiphop.00016.au +hiphop/hiphop.00017.au +hiphop/hiphop.00018.au +hiphop/hiphop.00019.au +hiphop/hiphop.00020.au +hiphop/hiphop.00021.au +hiphop/hiphop.00022.au +hiphop/hiphop.00023.au +hiphop/hiphop.00024.au +hiphop/hiphop.00025.au +hiphop/hiphop.00026.au +hiphop/hiphop.00027.au +hiphop/hiphop.00028.au +hiphop/hiphop.00029.au +hiphop/hiphop.00030.au +hiphop/hiphop.00031.au +hiphop/hiphop.00032.au +hiphop/hiphop.00033.au +hiphop/hiphop.00034.au +hiphop/hiphop.00035.au +hiphop/hiphop.00036.au +hiphop/hiphop.00037.au +hiphop/hiphop.00038.au +hiphop/hiphop.00039.au +hiphop/hiphop.00040.au +hiphop/hiphop.00041.au +hiphop/hiphop.00042.au +hiphop/hiphop.00043.au +hiphop/hiphop.00044.au +hiphop/hiphop.00045.au +hiphop/hiphop.00046.au +hiphop/hiphop.00047.au +hiphop/hiphop.00048.au +hiphop/hiphop.00049.au +hiphop/hiphop.00050.au +hiphop/hiphop.00051.au +hiphop/hiphop.00052.au +hiphop/hiphop.00053.au +hiphop/hiphop.00054.au +hiphop/hiphop.00055.au +hiphop/hiphop.00056.au +hiphop/hiphop.00057.au +hiphop/hiphop.00058.au +hiphop/hiphop.00059.au +hiphop/hiphop.00060.au +hiphop/hiphop.00061.au +hiphop/hiphop.00062.au +hiphop/hiphop.00063.au +hiphop/hiphop.00064.au +hiphop/hiphop.00065.au +hiphop/hiphop.00066.au +hiphop/hiphop.00067.au +hiphop/hiphop.00068.au +hiphop/hiphop.00069.au +hiphop/hiphop.00070.au +hiphop/hiphop.00071.au +hiphop/hiphop.00072.au +hiphop/hiphop.00073.au +hiphop/hiphop.00074.au +hiphop/hiphop.00075.au +hiphop/hiphop.00076.au +hiphop/hiphop.00077.au +hiphop/hiphop.00078.au +hiphop/hiphop.00079.au +hiphop/hiphop.00080.au +hiphop/hiphop.00081.au +hiphop/hiphop.00082.au +hiphop/hiphop.00083.au +hiphop/hiphop.00084.au +hiphop/hiphop.00085.au +hiphop/hiphop.00086.au +hiphop/hiphop.00087.au +hiphop/hiphop.00088.au +hiphop/hiphop.00089.au +hiphop/hiphop.00090.au +hiphop/hiphop.00091.au +hiphop/hiphop.00092.au +hiphop/hiphop.00093.au +hiphop/hiphop.00094.au +hiphop/hiphop.00095.au +hiphop/hiphop.00096.au +hiphop/hiphop.00097.au +hiphop/hiphop.00098.au +hiphop/hiphop.00099.au +jazz/jazz.00000.au +jazz/jazz.00001.au +jazz/jazz.00002.au +jazz/jazz.00003.au +jazz/jazz.00004.au +jazz/jazz.00005.au +jazz/jazz.00006.au +jazz/jazz.00007.au +jazz/jazz.00008.au +jazz/jazz.00009.au +jazz/jazz.00010.au +jazz/jazz.00011.au +jazz/jazz.00012.au +jazz/jazz.00013.au +jazz/jazz.00014.au +jazz/jazz.00015.au +jazz/jazz.00016.au +jazz/jazz.00017.au +jazz/jazz.00018.au +jazz/jazz.00019.au +jazz/jazz.00020.au +jazz/jazz.00021.au +jazz/jazz.00022.au +jazz/jazz.00023.au +jazz/jazz.00024.au +jazz/jazz.00025.au +jazz/jazz.00026.au +jazz/jazz.00027.au +jazz/jazz.00028.au +jazz/jazz.00029.au +jazz/jazz.00030.au +jazz/jazz.00031.au +jazz/jazz.00032.au +jazz/jazz.00033.au +jazz/jazz.00034.au +jazz/jazz.00035.au +jazz/jazz.00036.au +jazz/jazz.00037.au +jazz/jazz.00038.au +jazz/jazz.00039.au +jazz/jazz.00040.au +jazz/jazz.00041.au +jazz/jazz.00042.au +jazz/jazz.00043.au +jazz/jazz.00044.au +jazz/jazz.00045.au +jazz/jazz.00046.au +jazz/jazz.00047.au +jazz/jazz.00048.au +jazz/jazz.00049.au +jazz/jazz.00050.au +jazz/jazz.00051.au +jazz/jazz.00052.au +jazz/jazz.00053.au +jazz/jazz.00054.au +jazz/jazz.00055.au +jazz/jazz.00056.au +jazz/jazz.00057.au +jazz/jazz.00058.au +jazz/jazz.00059.au +jazz/jazz.00060.au +jazz/jazz.00061.au +jazz/jazz.00062.au +jazz/jazz.00063.au +jazz/jazz.00064.au +jazz/jazz.00065.au +jazz/jazz.00066.au +jazz/jazz.00067.au +jazz/jazz.00068.au +jazz/jazz.00069.au +jazz/jazz.00070.au +jazz/jazz.00071.au +jazz/jazz.00072.au +jazz/jazz.00073.au +jazz/jazz.00074.au +jazz/jazz.00075.au +jazz/jazz.00076.au +jazz/jazz.00077.au +jazz/jazz.00078.au +jazz/jazz.00079.au +jazz/jazz.00080.au +jazz/jazz.00081.au +jazz/jazz.00082.au +jazz/jazz.00083.au +jazz/jazz.00084.au +jazz/jazz.00085.au +jazz/jazz.00086.au +jazz/jazz.00087.au +jazz/jazz.00088.au +jazz/jazz.00089.au +jazz/jazz.00090.au +jazz/jazz.00091.au +jazz/jazz.00092.au +jazz/jazz.00093.au +jazz/jazz.00094.au +jazz/jazz.00095.au +jazz/jazz.00096.au +jazz/jazz.00097.au +jazz/jazz.00098.au +jazz/jazz.00099.au +metal/metal.00000.au +metal/metal.00001.au +metal/metal.00002.au +metal/metal.00003.au +metal/metal.00004.au +metal/metal.00005.au +metal/metal.00006.au +metal/metal.00007.au +metal/metal.00008.au +metal/metal.00009.au +metal/metal.00010.au +metal/metal.00011.au +metal/metal.00012.au +metal/metal.00013.au +metal/metal.00014.au +metal/metal.00015.au +metal/metal.00016.au +metal/metal.00017.au +metal/metal.00018.au +metal/metal.00019.au +metal/metal.00020.au +metal/metal.00021.au +metal/metal.00022.au +metal/metal.00023.au +metal/metal.00024.au +metal/metal.00025.au +metal/metal.00026.au +metal/metal.00027.au +metal/metal.00028.au +metal/metal.00029.au +metal/metal.00030.au +metal/metal.00031.au +metal/metal.00032.au +metal/metal.00033.au +metal/metal.00034.au +metal/metal.00035.au +metal/metal.00036.au +metal/metal.00037.au +metal/metal.00038.au +metal/metal.00039.au +metal/metal.00040.au +metal/metal.00041.au +metal/metal.00042.au +metal/metal.00043.au +metal/metal.00044.au +metal/metal.00045.au +metal/metal.00046.au +metal/metal.00047.au +metal/metal.00048.au +metal/metal.00049.au +metal/metal.00050.au +metal/metal.00051.au +metal/metal.00052.au +metal/metal.00053.au +metal/metal.00054.au +metal/metal.00055.au +metal/metal.00056.au +metal/metal.00057.au +metal/metal.00058.au +metal/metal.00059.au +metal/metal.00060.au +metal/metal.00061.au +metal/metal.00062.au +metal/metal.00063.au +metal/metal.00064.au +metal/metal.00065.au +metal/metal.00066.au +metal/metal.00067.au +metal/metal.00068.au +metal/metal.00069.au +metal/metal.00070.au +metal/metal.00071.au +metal/metal.00072.au +metal/metal.00073.au +metal/metal.00074.au +metal/metal.00075.au +metal/metal.00076.au +metal/metal.00077.au +metal/metal.00078.au +metal/metal.00079.au +metal/metal.00080.au +metal/metal.00081.au +metal/metal.00082.au +metal/metal.00083.au +metal/metal.00084.au +metal/metal.00085.au +metal/metal.00086.au +metal/metal.00087.au +metal/metal.00088.au +metal/metal.00089.au +metal/metal.00090.au +metal/metal.00091.au +metal/metal.00092.au +metal/metal.00093.au +metal/metal.00094.au +metal/metal.00095.au +metal/metal.00096.au +metal/metal.00097.au +metal/metal.00098.au +metal/metal.00099.au +pop/pop.00000.au +pop/pop.00001.au +pop/pop.00002.au +pop/pop.00003.au +pop/pop.00004.au +pop/pop.00005.au +pop/pop.00006.au +pop/pop.00007.au +pop/pop.00008.au +pop/pop.00009.au +pop/pop.00010.au +pop/pop.00011.au +pop/pop.00012.au +pop/pop.00013.au +pop/pop.00014.au +pop/pop.00015.au +pop/pop.00016.au +pop/pop.00017.au +pop/pop.00018.au +pop/pop.00019.au +pop/pop.00020.au +pop/pop.00021.au +pop/pop.00022.au +pop/pop.00023.au +pop/pop.00024.au +pop/pop.00025.au +pop/pop.00026.au +pop/pop.00027.au +pop/pop.00028.au +pop/pop.00029.au +pop/pop.00030.au +pop/pop.00031.au +pop/pop.00032.au +pop/pop.00033.au +pop/pop.00034.au +pop/pop.00035.au +pop/pop.00036.au +pop/pop.00037.au +pop/pop.00038.au +pop/pop.00039.au +pop/pop.00040.au +pop/pop.00041.au +pop/pop.00042.au +pop/pop.00043.au +pop/pop.00044.au +pop/pop.00045.au +pop/pop.00046.au +pop/pop.00047.au +pop/pop.00048.au +pop/pop.00049.au +pop/pop.00050.au +pop/pop.00051.au +pop/pop.00052.au +pop/pop.00053.au +pop/pop.00054.au +pop/pop.00055.au +pop/pop.00056.au +pop/pop.00057.au +pop/pop.00058.au +pop/pop.00059.au +pop/pop.00060.au +pop/pop.00061.au +pop/pop.00062.au +pop/pop.00063.au +pop/pop.00064.au +pop/pop.00065.au +pop/pop.00066.au +pop/pop.00067.au +pop/pop.00068.au +pop/pop.00069.au +pop/pop.00070.au +pop/pop.00071.au +pop/pop.00072.au +pop/pop.00073.au +pop/pop.00074.au +pop/pop.00075.au +pop/pop.00076.au +pop/pop.00077.au +pop/pop.00078.au +pop/pop.00079.au +pop/pop.00080.au +pop/pop.00081.au +pop/pop.00082.au +pop/pop.00083.au +pop/pop.00084.au +pop/pop.00085.au +pop/pop.00086.au +pop/pop.00087.au +pop/pop.00088.au +pop/pop.00089.au +pop/pop.00090.au +pop/pop.00091.au +pop/pop.00092.au +pop/pop.00093.au +pop/pop.00094.au +pop/pop.00095.au +pop/pop.00096.au +pop/pop.00097.au +pop/pop.00098.au +pop/pop.00099.au +reggae/reggae.00000.au +reggae/reggae.00001.au +reggae/reggae.00002.au +reggae/reggae.00003.au +reggae/reggae.00004.au +reggae/reggae.00005.au +reggae/reggae.00006.au +reggae/reggae.00007.au +reggae/reggae.00008.au +reggae/reggae.00009.au +reggae/reggae.00010.au +reggae/reggae.00011.au +reggae/reggae.00012.au +reggae/reggae.00013.au +reggae/reggae.00014.au +reggae/reggae.00015.au +reggae/reggae.00016.au +reggae/reggae.00017.au +reggae/reggae.00018.au +reggae/reggae.00019.au +reggae/reggae.00020.au +reggae/reggae.00021.au +reggae/reggae.00022.au +reggae/reggae.00023.au +reggae/reggae.00024.au +reggae/reggae.00025.au +reggae/reggae.00026.au +reggae/reggae.00027.au +reggae/reggae.00028.au +reggae/reggae.00029.au +reggae/reggae.00030.au +reggae/reggae.00031.au +reggae/reggae.00032.au +reggae/reggae.00033.au +reggae/reggae.00034.au +reggae/reggae.00035.au +reggae/reggae.00036.au +reggae/reggae.00037.au +reggae/reggae.00038.au +reggae/reggae.00039.au +reggae/reggae.00040.au +reggae/reggae.00041.au +reggae/reggae.00042.au +reggae/reggae.00043.au +reggae/reggae.00044.au +reggae/reggae.00045.au +reggae/reggae.00046.au +reggae/reggae.00047.au +reggae/reggae.00048.au +reggae/reggae.00049.au +reggae/reggae.00050.au +reggae/reggae.00051.au +reggae/reggae.00052.au +reggae/reggae.00053.au +reggae/reggae.00054.au +reggae/reggae.00055.au +reggae/reggae.00056.au +reggae/reggae.00057.au +reggae/reggae.00058.au +reggae/reggae.00059.au +reggae/reggae.00060.au +reggae/reggae.00061.au +reggae/reggae.00062.au +reggae/reggae.00063.au +reggae/reggae.00064.au +reggae/reggae.00065.au +reggae/reggae.00066.au +reggae/reggae.00067.au +reggae/reggae.00068.au +reggae/reggae.00069.au +reggae/reggae.00070.au +reggae/reggae.00071.au +reggae/reggae.00072.au +reggae/reggae.00073.au +reggae/reggae.00074.au +reggae/reggae.00075.au +reggae/reggae.00076.au +reggae/reggae.00077.au +reggae/reggae.00078.au +reggae/reggae.00079.au +reggae/reggae.00080.au +reggae/reggae.00081.au +reggae/reggae.00082.au +reggae/reggae.00083.au +reggae/reggae.00084.au +reggae/reggae.00085.au +reggae/reggae.00086.au +reggae/reggae.00087.au +reggae/reggae.00088.au +reggae/reggae.00089.au +reggae/reggae.00090.au +reggae/reggae.00091.au +reggae/reggae.00092.au +reggae/reggae.00093.au +reggae/reggae.00094.au +reggae/reggae.00095.au +reggae/reggae.00096.au +reggae/reggae.00097.au +reggae/reggae.00098.au +reggae/reggae.00099.au +rock/rock.00000.au +rock/rock.00001.au +rock/rock.00002.au +rock/rock.00003.au +rock/rock.00004.au +rock/rock.00005.au +rock/rock.00006.au +rock/rock.00007.au +rock/rock.00008.au +rock/rock.00009.au +rock/rock.00010.au +rock/rock.00011.au +rock/rock.00012.au +rock/rock.00013.au +rock/rock.00014.au +rock/rock.00015.au +rock/rock.00016.au +rock/rock.00017.au +rock/rock.00018.au +rock/rock.00019.au +rock/rock.00020.au +rock/rock.00021.au +rock/rock.00022.au +rock/rock.00023.au +rock/rock.00024.au +rock/rock.00025.au +rock/rock.00026.au +rock/rock.00027.au +rock/rock.00028.au +rock/rock.00029.au +rock/rock.00030.au +rock/rock.00031.au +rock/rock.00032.au +rock/rock.00033.au +rock/rock.00034.au +rock/rock.00035.au +rock/rock.00036.au +rock/rock.00037.au +rock/rock.00038.au +rock/rock.00039.au +rock/rock.00040.au +rock/rock.00041.au +rock/rock.00042.au +rock/rock.00043.au +rock/rock.00044.au +rock/rock.00045.au +rock/rock.00046.au +rock/rock.00047.au +rock/rock.00048.au +rock/rock.00049.au +rock/rock.00050.au +rock/rock.00051.au +rock/rock.00052.au +rock/rock.00053.au +rock/rock.00054.au +rock/rock.00055.au +rock/rock.00056.au +rock/rock.00057.au +rock/rock.00058.au +rock/rock.00059.au +rock/rock.00060.au +rock/rock.00061.au +rock/rock.00062.au +rock/rock.00063.au +rock/rock.00064.au +rock/rock.00065.au +rock/rock.00066.au +rock/rock.00067.au +rock/rock.00068.au +rock/rock.00069.au +rock/rock.00070.au +rock/rock.00071.au +rock/rock.00072.au +rock/rock.00073.au +rock/rock.00074.au +rock/rock.00075.au +rock/rock.00076.au +rock/rock.00077.au +rock/rock.00078.au +rock/rock.00079.au +rock/rock.00080.au +rock/rock.00081.au +rock/rock.00082.au +rock/rock.00083.au +rock/rock.00084.au +rock/rock.00085.au +rock/rock.00086.au +rock/rock.00087.au +rock/rock.00088.au +rock/rock.00089.au +rock/rock.00090.au +rock/rock.00091.au +rock/rock.00092.au +rock/rock.00093.au +rock/rock.00094.au +rock/rock.00095.au +rock/rock.00096.au +rock/rock.00097.au +rock/rock.00098.au +rock/rock.00099.au diff --git a/datasets/gtzan/splits/hiphop.train b/datasets/gtzan/splits/hiphop.train new file mode 100644 index 0000000..72cb650 --- /dev/null +++ b/datasets/gtzan/splits/hiphop.train @@ -0,0 +1,70 @@ +hiphop/hiphop.00000.au 4 +hiphop/hiphop.00001.au 4 +hiphop/hiphop.00003.au 4 +hiphop/hiphop.00006.au 4 +hiphop/hiphop.00007.au 4 +hiphop/hiphop.00008.au 4 +hiphop/hiphop.00009.au 4 +hiphop/hiphop.00011.au 4 +hiphop/hiphop.00012.au 4 +hiphop/hiphop.00015.au 4 +hiphop/hiphop.00017.au 4 +hiphop/hiphop.00018.au 4 +hiphop/hiphop.00021.au 4 +hiphop/hiphop.00022.au 4 +hiphop/hiphop.00023.au 4 +hiphop/hiphop.00025.au 4 +hiphop/hiphop.00027.au 4 +hiphop/hiphop.00028.au 4 +hiphop/hiphop.00030.au 4 +hiphop/hiphop.00031.au 4 +hiphop/hiphop.00032.au 4 +hiphop/hiphop.00033.au 4 +hiphop/hiphop.00034.au 4 +hiphop/hiphop.00036.au 4 +hiphop/hiphop.00038.au 4 +hiphop/hiphop.00039.au 4 +hiphop/hiphop.00040.au 4 +hiphop/hiphop.00042.au 4 +hiphop/hiphop.00044.au 4 +hiphop/hiphop.00045.au 4 +hiphop/hiphop.00046.au 4 +hiphop/hiphop.00048.au 4 +hiphop/hiphop.00049.au 4 +hiphop/hiphop.00050.au 4 +hiphop/hiphop.00051.au 4 +hiphop/hiphop.00052.au 4 +hiphop/hiphop.00055.au 4 +hiphop/hiphop.00056.au 4 +hiphop/hiphop.00057.au 4 +hiphop/hiphop.00058.au 4 +hiphop/hiphop.00059.au 4 +hiphop/hiphop.00061.au 4 +hiphop/hiphop.00062.au 4 +hiphop/hiphop.00064.au 4 +hiphop/hiphop.00065.au 4 +hiphop/hiphop.00066.au 4 +hiphop/hiphop.00067.au 4 +hiphop/hiphop.00068.au 4 +hiphop/hiphop.00069.au 4 +hiphop/hiphop.00072.au 4 +hiphop/hiphop.00073.au 4 +hiphop/hiphop.00074.au 4 +hiphop/hiphop.00075.au 4 +hiphop/hiphop.00077.au 4 +hiphop/hiphop.00078.au 4 +hiphop/hiphop.00079.au 4 +hiphop/hiphop.00082.au 4 +hiphop/hiphop.00083.au 4 +hiphop/hiphop.00084.au 4 +hiphop/hiphop.00086.au 4 +hiphop/hiphop.00087.au 4 +hiphop/hiphop.00088.au 4 +hiphop/hiphop.00089.au 4 +hiphop/hiphop.00090.au 4 +hiphop/hiphop.00091.au 4 +hiphop/hiphop.00092.au 4 +hiphop/hiphop.00093.au 4 +hiphop/hiphop.00094.au 4 +hiphop/hiphop.00098.au 4 +hiphop/hiphop.00099.au 4 diff --git a/datasets/gtzan/splits/hiphop.trainval b/datasets/gtzan/splits/hiphop.trainval new file mode 100644 index 0000000..a89f95b --- /dev/null +++ b/datasets/gtzan/splits/hiphop.trainval @@ -0,0 +1,100 @@ +hiphop/hiphop.00000.au 4 +hiphop/hiphop.00001.au 4 +hiphop/hiphop.00002.au 4 +hiphop/hiphop.00003.au 4 +hiphop/hiphop.00004.au 4 +hiphop/hiphop.00005.au 4 +hiphop/hiphop.00006.au 4 +hiphop/hiphop.00007.au 4 +hiphop/hiphop.00008.au 4 +hiphop/hiphop.00009.au 4 +hiphop/hiphop.00010.au 4 +hiphop/hiphop.00011.au 4 +hiphop/hiphop.00012.au 4 +hiphop/hiphop.00013.au 4 +hiphop/hiphop.00014.au 4 +hiphop/hiphop.00015.au 4 +hiphop/hiphop.00016.au 4 +hiphop/hiphop.00017.au 4 +hiphop/hiphop.00018.au 4 +hiphop/hiphop.00019.au 4 +hiphop/hiphop.00020.au 4 +hiphop/hiphop.00021.au 4 +hiphop/hiphop.00022.au 4 +hiphop/hiphop.00023.au 4 +hiphop/hiphop.00024.au 4 +hiphop/hiphop.00025.au 4 +hiphop/hiphop.00026.au 4 +hiphop/hiphop.00027.au 4 +hiphop/hiphop.00028.au 4 +hiphop/hiphop.00029.au 4 +hiphop/hiphop.00030.au 4 +hiphop/hiphop.00031.au 4 +hiphop/hiphop.00032.au 4 +hiphop/hiphop.00033.au 4 +hiphop/hiphop.00034.au 4 +hiphop/hiphop.00035.au 4 +hiphop/hiphop.00036.au 4 +hiphop/hiphop.00037.au 4 +hiphop/hiphop.00038.au 4 +hiphop/hiphop.00039.au 4 +hiphop/hiphop.00040.au 4 +hiphop/hiphop.00041.au 4 +hiphop/hiphop.00042.au 4 +hiphop/hiphop.00043.au 4 +hiphop/hiphop.00044.au 4 +hiphop/hiphop.00045.au 4 +hiphop/hiphop.00046.au 4 +hiphop/hiphop.00047.au 4 +hiphop/hiphop.00048.au 4 +hiphop/hiphop.00049.au 4 +hiphop/hiphop.00050.au 4 +hiphop/hiphop.00051.au 4 +hiphop/hiphop.00052.au 4 +hiphop/hiphop.00053.au 4 +hiphop/hiphop.00054.au 4 +hiphop/hiphop.00055.au 4 +hiphop/hiphop.00056.au 4 +hiphop/hiphop.00057.au 4 +hiphop/hiphop.00058.au 4 +hiphop/hiphop.00059.au 4 +hiphop/hiphop.00060.au 4 +hiphop/hiphop.00061.au 4 +hiphop/hiphop.00062.au 4 +hiphop/hiphop.00063.au 4 +hiphop/hiphop.00064.au 4 +hiphop/hiphop.00065.au 4 +hiphop/hiphop.00066.au 4 +hiphop/hiphop.00067.au 4 +hiphop/hiphop.00068.au 4 +hiphop/hiphop.00069.au 4 +hiphop/hiphop.00070.au 4 +hiphop/hiphop.00071.au 4 +hiphop/hiphop.00072.au 4 +hiphop/hiphop.00073.au 4 +hiphop/hiphop.00074.au 4 +hiphop/hiphop.00075.au 4 +hiphop/hiphop.00076.au 4 +hiphop/hiphop.00077.au 4 +hiphop/hiphop.00078.au 4 +hiphop/hiphop.00079.au 4 +hiphop/hiphop.00080.au 4 +hiphop/hiphop.00081.au 4 +hiphop/hiphop.00082.au 4 +hiphop/hiphop.00083.au 4 +hiphop/hiphop.00084.au 4 +hiphop/hiphop.00085.au 4 +hiphop/hiphop.00086.au 4 +hiphop/hiphop.00087.au 4 +hiphop/hiphop.00088.au 4 +hiphop/hiphop.00089.au 4 +hiphop/hiphop.00090.au 4 +hiphop/hiphop.00091.au 4 +hiphop/hiphop.00092.au 4 +hiphop/hiphop.00093.au 4 +hiphop/hiphop.00094.au 4 +hiphop/hiphop.00095.au 4 +hiphop/hiphop.00096.au 4 +hiphop/hiphop.00097.au 4 +hiphop/hiphop.00098.au 4 +hiphop/hiphop.00099.au 4 diff --git a/datasets/gtzan/splits/hiphop.val b/datasets/gtzan/splits/hiphop.val new file mode 100644 index 0000000..41c6ece --- /dev/null +++ b/datasets/gtzan/splits/hiphop.val @@ -0,0 +1,30 @@ +hiphop/hiphop.00002.au 4 +hiphop/hiphop.00004.au 4 +hiphop/hiphop.00005.au 4 +hiphop/hiphop.00010.au 4 +hiphop/hiphop.00013.au 4 +hiphop/hiphop.00014.au 4 +hiphop/hiphop.00016.au 4 +hiphop/hiphop.00019.au 4 +hiphop/hiphop.00020.au 4 +hiphop/hiphop.00024.au 4 +hiphop/hiphop.00026.au 4 +hiphop/hiphop.00029.au 4 +hiphop/hiphop.00035.au 4 +hiphop/hiphop.00037.au 4 +hiphop/hiphop.00041.au 4 +hiphop/hiphop.00043.au 4 +hiphop/hiphop.00047.au 4 +hiphop/hiphop.00053.au 4 +hiphop/hiphop.00054.au 4 +hiphop/hiphop.00060.au 4 +hiphop/hiphop.00063.au 4 +hiphop/hiphop.00070.au 4 +hiphop/hiphop.00071.au 4 +hiphop/hiphop.00076.au 4 +hiphop/hiphop.00080.au 4 +hiphop/hiphop.00081.au 4 +hiphop/hiphop.00085.au 4 +hiphop/hiphop.00095.au 4 +hiphop/hiphop.00096.au 4 +hiphop/hiphop.00097.au 4 diff --git a/datasets/gtzan/splits/jazz.train b/datasets/gtzan/splits/jazz.train new file mode 100644 index 0000000..904b361 --- /dev/null +++ b/datasets/gtzan/splits/jazz.train @@ -0,0 +1,70 @@ +jazz/jazz.00000.au 5 +jazz/jazz.00001.au 5 +jazz/jazz.00002.au 5 +jazz/jazz.00003.au 5 +jazz/jazz.00004.au 5 +jazz/jazz.00005.au 5 +jazz/jazz.00006.au 5 +jazz/jazz.00007.au 5 +jazz/jazz.00008.au 5 +jazz/jazz.00009.au 5 +jazz/jazz.00011.au 5 +jazz/jazz.00014.au 5 +jazz/jazz.00015.au 5 +jazz/jazz.00016.au 5 +jazz/jazz.00018.au 5 +jazz/jazz.00019.au 5 +jazz/jazz.00021.au 5 +jazz/jazz.00025.au 5 +jazz/jazz.00026.au 5 +jazz/jazz.00027.au 5 +jazz/jazz.00029.au 5 +jazz/jazz.00030.au 5 +jazz/jazz.00031.au 5 +jazz/jazz.00033.au 5 +jazz/jazz.00036.au 5 +jazz/jazz.00038.au 5 +jazz/jazz.00041.au 5 +jazz/jazz.00043.au 5 +jazz/jazz.00044.au 5 +jazz/jazz.00045.au 5 +jazz/jazz.00047.au 5 +jazz/jazz.00048.au 5 +jazz/jazz.00049.au 5 +jazz/jazz.00050.au 5 +jazz/jazz.00051.au 5 +jazz/jazz.00052.au 5 +jazz/jazz.00053.au 5 +jazz/jazz.00054.au 5 +jazz/jazz.00058.au 5 +jazz/jazz.00059.au 5 +jazz/jazz.00061.au 5 +jazz/jazz.00062.au 5 +jazz/jazz.00064.au 5 +jazz/jazz.00065.au 5 +jazz/jazz.00067.au 5 +jazz/jazz.00068.au 5 +jazz/jazz.00072.au 5 +jazz/jazz.00073.au 5 +jazz/jazz.00075.au 5 +jazz/jazz.00076.au 5 +jazz/jazz.00077.au 5 +jazz/jazz.00078.au 5 +jazz/jazz.00079.au 5 +jazz/jazz.00080.au 5 +jazz/jazz.00081.au 5 +jazz/jazz.00082.au 5 +jazz/jazz.00083.au 5 +jazz/jazz.00084.au 5 +jazz/jazz.00085.au 5 +jazz/jazz.00086.au 5 +jazz/jazz.00088.au 5 +jazz/jazz.00089.au 5 +jazz/jazz.00090.au 5 +jazz/jazz.00091.au 5 +jazz/jazz.00092.au 5 +jazz/jazz.00093.au 5 +jazz/jazz.00095.au 5 +jazz/jazz.00096.au 5 +jazz/jazz.00098.au 5 +jazz/jazz.00099.au 5 diff --git a/datasets/gtzan/splits/jazz.trainval b/datasets/gtzan/splits/jazz.trainval new file mode 100644 index 0000000..4c401e6 --- /dev/null +++ b/datasets/gtzan/splits/jazz.trainval @@ -0,0 +1,100 @@ +jazz/jazz.00000.au 5 +jazz/jazz.00001.au 5 +jazz/jazz.00002.au 5 +jazz/jazz.00003.au 5 +jazz/jazz.00004.au 5 +jazz/jazz.00005.au 5 +jazz/jazz.00006.au 5 +jazz/jazz.00007.au 5 +jazz/jazz.00008.au 5 +jazz/jazz.00009.au 5 +jazz/jazz.00010.au 5 +jazz/jazz.00011.au 5 +jazz/jazz.00012.au 5 +jazz/jazz.00013.au 5 +jazz/jazz.00014.au 5 +jazz/jazz.00015.au 5 +jazz/jazz.00016.au 5 +jazz/jazz.00017.au 5 +jazz/jazz.00018.au 5 +jazz/jazz.00019.au 5 +jazz/jazz.00020.au 5 +jazz/jazz.00021.au 5 +jazz/jazz.00022.au 5 +jazz/jazz.00023.au 5 +jazz/jazz.00024.au 5 +jazz/jazz.00025.au 5 +jazz/jazz.00026.au 5 +jazz/jazz.00027.au 5 +jazz/jazz.00028.au 5 +jazz/jazz.00029.au 5 +jazz/jazz.00030.au 5 +jazz/jazz.00031.au 5 +jazz/jazz.00032.au 5 +jazz/jazz.00033.au 5 +jazz/jazz.00034.au 5 +jazz/jazz.00035.au 5 +jazz/jazz.00036.au 5 +jazz/jazz.00037.au 5 +jazz/jazz.00038.au 5 +jazz/jazz.00039.au 5 +jazz/jazz.00040.au 5 +jazz/jazz.00041.au 5 +jazz/jazz.00042.au 5 +jazz/jazz.00043.au 5 +jazz/jazz.00044.au 5 +jazz/jazz.00045.au 5 +jazz/jazz.00046.au 5 +jazz/jazz.00047.au 5 +jazz/jazz.00048.au 5 +jazz/jazz.00049.au 5 +jazz/jazz.00050.au 5 +jazz/jazz.00051.au 5 +jazz/jazz.00052.au 5 +jazz/jazz.00053.au 5 +jazz/jazz.00054.au 5 +jazz/jazz.00055.au 5 +jazz/jazz.00056.au 5 +jazz/jazz.00057.au 5 +jazz/jazz.00058.au 5 +jazz/jazz.00059.au 5 +jazz/jazz.00060.au 5 +jazz/jazz.00061.au 5 +jazz/jazz.00062.au 5 +jazz/jazz.00063.au 5 +jazz/jazz.00064.au 5 +jazz/jazz.00065.au 5 +jazz/jazz.00066.au 5 +jazz/jazz.00067.au 5 +jazz/jazz.00068.au 5 +jazz/jazz.00069.au 5 +jazz/jazz.00070.au 5 +jazz/jazz.00071.au 5 +jazz/jazz.00072.au 5 +jazz/jazz.00073.au 5 +jazz/jazz.00074.au 5 +jazz/jazz.00075.au 5 +jazz/jazz.00076.au 5 +jazz/jazz.00077.au 5 +jazz/jazz.00078.au 5 +jazz/jazz.00079.au 5 +jazz/jazz.00080.au 5 +jazz/jazz.00081.au 5 +jazz/jazz.00082.au 5 +jazz/jazz.00083.au 5 +jazz/jazz.00084.au 5 +jazz/jazz.00085.au 5 +jazz/jazz.00086.au 5 +jazz/jazz.00087.au 5 +jazz/jazz.00088.au 5 +jazz/jazz.00089.au 5 +jazz/jazz.00090.au 5 +jazz/jazz.00091.au 5 +jazz/jazz.00092.au 5 +jazz/jazz.00093.au 5 +jazz/jazz.00094.au 5 +jazz/jazz.00095.au 5 +jazz/jazz.00096.au 5 +jazz/jazz.00097.au 5 +jazz/jazz.00098.au 5 +jazz/jazz.00099.au 5 diff --git a/datasets/gtzan/splits/jazz.val b/datasets/gtzan/splits/jazz.val new file mode 100644 index 0000000..976bd73 --- /dev/null +++ b/datasets/gtzan/splits/jazz.val @@ -0,0 +1,30 @@ +jazz/jazz.00010.au 5 +jazz/jazz.00012.au 5 +jazz/jazz.00013.au 5 +jazz/jazz.00017.au 5 +jazz/jazz.00020.au 5 +jazz/jazz.00022.au 5 +jazz/jazz.00023.au 5 +jazz/jazz.00024.au 5 +jazz/jazz.00028.au 5 +jazz/jazz.00032.au 5 +jazz/jazz.00034.au 5 +jazz/jazz.00035.au 5 +jazz/jazz.00037.au 5 +jazz/jazz.00039.au 5 +jazz/jazz.00040.au 5 +jazz/jazz.00042.au 5 +jazz/jazz.00046.au 5 +jazz/jazz.00055.au 5 +jazz/jazz.00056.au 5 +jazz/jazz.00057.au 5 +jazz/jazz.00060.au 5 +jazz/jazz.00063.au 5 +jazz/jazz.00066.au 5 +jazz/jazz.00069.au 5 +jazz/jazz.00070.au 5 +jazz/jazz.00071.au 5 +jazz/jazz.00074.au 5 +jazz/jazz.00087.au 5 +jazz/jazz.00094.au 5 +jazz/jazz.00097.au 5 diff --git a/datasets/gtzan/splits/metal.train b/datasets/gtzan/splits/metal.train new file mode 100644 index 0000000..4aef1c2 --- /dev/null +++ b/datasets/gtzan/splits/metal.train @@ -0,0 +1,70 @@ +metal/metal.00001.au 6 +metal/metal.00002.au 6 +metal/metal.00003.au 6 +metal/metal.00004.au 6 +metal/metal.00005.au 6 +metal/metal.00006.au 6 +metal/metal.00007.au 6 +metal/metal.00008.au 6 +metal/metal.00010.au 6 +metal/metal.00011.au 6 +metal/metal.00012.au 6 +metal/metal.00014.au 6 +metal/metal.00015.au 6 +metal/metal.00018.au 6 +metal/metal.00019.au 6 +metal/metal.00021.au 6 +metal/metal.00022.au 6 +metal/metal.00024.au 6 +metal/metal.00025.au 6 +metal/metal.00026.au 6 +metal/metal.00027.au 6 +metal/metal.00028.au 6 +metal/metal.00031.au 6 +metal/metal.00033.au 6 +metal/metal.00035.au 6 +metal/metal.00036.au 6 +metal/metal.00038.au 6 +metal/metal.00040.au 6 +metal/metal.00041.au 6 +metal/metal.00042.au 6 +metal/metal.00043.au 6 +metal/metal.00046.au 6 +metal/metal.00049.au 6 +metal/metal.00050.au 6 +metal/metal.00051.au 6 +metal/metal.00052.au 6 +metal/metal.00053.au 6 +metal/metal.00055.au 6 +metal/metal.00056.au 6 +metal/metal.00057.au 6 +metal/metal.00058.au 6 +metal/metal.00062.au 6 +metal/metal.00063.au 6 +metal/metal.00064.au 6 +metal/metal.00065.au 6 +metal/metal.00066.au 6 +metal/metal.00068.au 6 +metal/metal.00069.au 6 +metal/metal.00071.au 6 +metal/metal.00072.au 6 +metal/metal.00073.au 6 +metal/metal.00075.au 6 +metal/metal.00076.au 6 +metal/metal.00077.au 6 +metal/metal.00078.au 6 +metal/metal.00079.au 6 +metal/metal.00080.au 6 +metal/metal.00082.au 6 +metal/metal.00084.au 6 +metal/metal.00085.au 6 +metal/metal.00087.au 6 +metal/metal.00088.au 6 +metal/metal.00089.au 6 +metal/metal.00090.au 6 +metal/metal.00092.au 6 +metal/metal.00094.au 6 +metal/metal.00095.au 6 +metal/metal.00096.au 6 +metal/metal.00098.au 6 +metal/metal.00099.au 6 diff --git a/datasets/gtzan/splits/metal.trainval b/datasets/gtzan/splits/metal.trainval new file mode 100644 index 0000000..e95137d --- /dev/null +++ b/datasets/gtzan/splits/metal.trainval @@ -0,0 +1,100 @@ +metal/metal.00000.au 6 +metal/metal.00001.au 6 +metal/metal.00002.au 6 +metal/metal.00003.au 6 +metal/metal.00004.au 6 +metal/metal.00005.au 6 +metal/metal.00006.au 6 +metal/metal.00007.au 6 +metal/metal.00008.au 6 +metal/metal.00009.au 6 +metal/metal.00010.au 6 +metal/metal.00011.au 6 +metal/metal.00012.au 6 +metal/metal.00013.au 6 +metal/metal.00014.au 6 +metal/metal.00015.au 6 +metal/metal.00016.au 6 +metal/metal.00017.au 6 +metal/metal.00018.au 6 +metal/metal.00019.au 6 +metal/metal.00020.au 6 +metal/metal.00021.au 6 +metal/metal.00022.au 6 +metal/metal.00023.au 6 +metal/metal.00024.au 6 +metal/metal.00025.au 6 +metal/metal.00026.au 6 +metal/metal.00027.au 6 +metal/metal.00028.au 6 +metal/metal.00029.au 6 +metal/metal.00030.au 6 +metal/metal.00031.au 6 +metal/metal.00032.au 6 +metal/metal.00033.au 6 +metal/metal.00034.au 6 +metal/metal.00035.au 6 +metal/metal.00036.au 6 +metal/metal.00037.au 6 +metal/metal.00038.au 6 +metal/metal.00039.au 6 +metal/metal.00040.au 6 +metal/metal.00041.au 6 +metal/metal.00042.au 6 +metal/metal.00043.au 6 +metal/metal.00044.au 6 +metal/metal.00045.au 6 +metal/metal.00046.au 6 +metal/metal.00047.au 6 +metal/metal.00048.au 6 +metal/metal.00049.au 6 +metal/metal.00050.au 6 +metal/metal.00051.au 6 +metal/metal.00052.au 6 +metal/metal.00053.au 6 +metal/metal.00054.au 6 +metal/metal.00055.au 6 +metal/metal.00056.au 6 +metal/metal.00057.au 6 +metal/metal.00058.au 6 +metal/metal.00059.au 6 +metal/metal.00060.au 6 +metal/metal.00061.au 6 +metal/metal.00062.au 6 +metal/metal.00063.au 6 +metal/metal.00064.au 6 +metal/metal.00065.au 6 +metal/metal.00066.au 6 +metal/metal.00067.au 6 +metal/metal.00068.au 6 +metal/metal.00069.au 6 +metal/metal.00070.au 6 +metal/metal.00071.au 6 +metal/metal.00072.au 6 +metal/metal.00073.au 6 +metal/metal.00074.au 6 +metal/metal.00075.au 6 +metal/metal.00076.au 6 +metal/metal.00077.au 6 +metal/metal.00078.au 6 +metal/metal.00079.au 6 +metal/metal.00080.au 6 +metal/metal.00081.au 6 +metal/metal.00082.au 6 +metal/metal.00083.au 6 +metal/metal.00084.au 6 +metal/metal.00085.au 6 +metal/metal.00086.au 6 +metal/metal.00087.au 6 +metal/metal.00088.au 6 +metal/metal.00089.au 6 +metal/metal.00090.au 6 +metal/metal.00091.au 6 +metal/metal.00092.au 6 +metal/metal.00093.au 6 +metal/metal.00094.au 6 +metal/metal.00095.au 6 +metal/metal.00096.au 6 +metal/metal.00097.au 6 +metal/metal.00098.au 6 +metal/metal.00099.au 6 diff --git a/datasets/gtzan/splits/metal.val b/datasets/gtzan/splits/metal.val new file mode 100644 index 0000000..f437705 --- /dev/null +++ b/datasets/gtzan/splits/metal.val @@ -0,0 +1,30 @@ +metal/metal.00000.au 6 +metal/metal.00009.au 6 +metal/metal.00013.au 6 +metal/metal.00016.au 6 +metal/metal.00017.au 6 +metal/metal.00020.au 6 +metal/metal.00023.au 6 +metal/metal.00029.au 6 +metal/metal.00030.au 6 +metal/metal.00032.au 6 +metal/metal.00034.au 6 +metal/metal.00037.au 6 +metal/metal.00039.au 6 +metal/metal.00044.au 6 +metal/metal.00045.au 6 +metal/metal.00047.au 6 +metal/metal.00048.au 6 +metal/metal.00054.au 6 +metal/metal.00059.au 6 +metal/metal.00060.au 6 +metal/metal.00061.au 6 +metal/metal.00067.au 6 +metal/metal.00070.au 6 +metal/metal.00074.au 6 +metal/metal.00081.au 6 +metal/metal.00083.au 6 +metal/metal.00086.au 6 +metal/metal.00091.au 6 +metal/metal.00093.au 6 +metal/metal.00097.au 6 diff --git a/datasets/gtzan/splits/pop.train b/datasets/gtzan/splits/pop.train new file mode 100644 index 0000000..fed41e4 --- /dev/null +++ b/datasets/gtzan/splits/pop.train @@ -0,0 +1,70 @@ +pop/pop.00000.au 7 +pop/pop.00001.au 7 +pop/pop.00003.au 7 +pop/pop.00004.au 7 +pop/pop.00006.au 7 +pop/pop.00007.au 7 +pop/pop.00008.au 7 +pop/pop.00009.au 7 +pop/pop.00010.au 7 +pop/pop.00014.au 7 +pop/pop.00016.au 7 +pop/pop.00019.au 7 +pop/pop.00021.au 7 +pop/pop.00023.au 7 +pop/pop.00024.au 7 +pop/pop.00025.au 7 +pop/pop.00027.au 7 +pop/pop.00028.au 7 +pop/pop.00029.au 7 +pop/pop.00031.au 7 +pop/pop.00032.au 7 +pop/pop.00033.au 7 +pop/pop.00034.au 7 +pop/pop.00035.au 7 +pop/pop.00036.au 7 +pop/pop.00038.au 7 +pop/pop.00039.au 7 +pop/pop.00041.au 7 +pop/pop.00042.au 7 +pop/pop.00043.au 7 +pop/pop.00044.au 7 +pop/pop.00046.au 7 +pop/pop.00047.au 7 +pop/pop.00048.au 7 +pop/pop.00053.au 7 +pop/pop.00054.au 7 +pop/pop.00055.au 7 +pop/pop.00056.au 7 +pop/pop.00057.au 7 +pop/pop.00058.au 7 +pop/pop.00059.au 7 +pop/pop.00060.au 7 +pop/pop.00061.au 7 +pop/pop.00062.au 7 +pop/pop.00063.au 7 +pop/pop.00064.au 7 +pop/pop.00065.au 7 +pop/pop.00067.au 7 +pop/pop.00068.au 7 +pop/pop.00069.au 7 +pop/pop.00070.au 7 +pop/pop.00072.au 7 +pop/pop.00073.au 7 +pop/pop.00075.au 7 +pop/pop.00076.au 7 +pop/pop.00077.au 7 +pop/pop.00079.au 7 +pop/pop.00080.au 7 +pop/pop.00081.au 7 +pop/pop.00082.au 7 +pop/pop.00083.au 7 +pop/pop.00084.au 7 +pop/pop.00087.au 7 +pop/pop.00089.au 7 +pop/pop.00090.au 7 +pop/pop.00092.au 7 +pop/pop.00094.au 7 +pop/pop.00095.au 7 +pop/pop.00098.au 7 +pop/pop.00099.au 7 diff --git a/datasets/gtzan/splits/pop.trainval b/datasets/gtzan/splits/pop.trainval new file mode 100644 index 0000000..6372a64 --- /dev/null +++ b/datasets/gtzan/splits/pop.trainval @@ -0,0 +1,100 @@ +pop/pop.00000.au 7 +pop/pop.00001.au 7 +pop/pop.00002.au 7 +pop/pop.00003.au 7 +pop/pop.00004.au 7 +pop/pop.00005.au 7 +pop/pop.00006.au 7 +pop/pop.00007.au 7 +pop/pop.00008.au 7 +pop/pop.00009.au 7 +pop/pop.00010.au 7 +pop/pop.00011.au 7 +pop/pop.00012.au 7 +pop/pop.00013.au 7 +pop/pop.00014.au 7 +pop/pop.00015.au 7 +pop/pop.00016.au 7 +pop/pop.00017.au 7 +pop/pop.00018.au 7 +pop/pop.00019.au 7 +pop/pop.00020.au 7 +pop/pop.00021.au 7 +pop/pop.00022.au 7 +pop/pop.00023.au 7 +pop/pop.00024.au 7 +pop/pop.00025.au 7 +pop/pop.00026.au 7 +pop/pop.00027.au 7 +pop/pop.00028.au 7 +pop/pop.00029.au 7 +pop/pop.00030.au 7 +pop/pop.00031.au 7 +pop/pop.00032.au 7 +pop/pop.00033.au 7 +pop/pop.00034.au 7 +pop/pop.00035.au 7 +pop/pop.00036.au 7 +pop/pop.00037.au 7 +pop/pop.00038.au 7 +pop/pop.00039.au 7 +pop/pop.00040.au 7 +pop/pop.00041.au 7 +pop/pop.00042.au 7 +pop/pop.00043.au 7 +pop/pop.00044.au 7 +pop/pop.00045.au 7 +pop/pop.00046.au 7 +pop/pop.00047.au 7 +pop/pop.00048.au 7 +pop/pop.00049.au 7 +pop/pop.00050.au 7 +pop/pop.00051.au 7 +pop/pop.00052.au 7 +pop/pop.00053.au 7 +pop/pop.00054.au 7 +pop/pop.00055.au 7 +pop/pop.00056.au 7 +pop/pop.00057.au 7 +pop/pop.00058.au 7 +pop/pop.00059.au 7 +pop/pop.00060.au 7 +pop/pop.00061.au 7 +pop/pop.00062.au 7 +pop/pop.00063.au 7 +pop/pop.00064.au 7 +pop/pop.00065.au 7 +pop/pop.00066.au 7 +pop/pop.00067.au 7 +pop/pop.00068.au 7 +pop/pop.00069.au 7 +pop/pop.00070.au 7 +pop/pop.00071.au 7 +pop/pop.00072.au 7 +pop/pop.00073.au 7 +pop/pop.00074.au 7 +pop/pop.00075.au 7 +pop/pop.00076.au 7 +pop/pop.00077.au 7 +pop/pop.00078.au 7 +pop/pop.00079.au 7 +pop/pop.00080.au 7 +pop/pop.00081.au 7 +pop/pop.00082.au 7 +pop/pop.00083.au 7 +pop/pop.00084.au 7 +pop/pop.00085.au 7 +pop/pop.00086.au 7 +pop/pop.00087.au 7 +pop/pop.00088.au 7 +pop/pop.00089.au 7 +pop/pop.00090.au 7 +pop/pop.00091.au 7 +pop/pop.00092.au 7 +pop/pop.00093.au 7 +pop/pop.00094.au 7 +pop/pop.00095.au 7 +pop/pop.00096.au 7 +pop/pop.00097.au 7 +pop/pop.00098.au 7 +pop/pop.00099.au 7 diff --git a/datasets/gtzan/splits/pop.val b/datasets/gtzan/splits/pop.val new file mode 100644 index 0000000..b8f6875 --- /dev/null +++ b/datasets/gtzan/splits/pop.val @@ -0,0 +1,30 @@ +pop/pop.00002.au 7 +pop/pop.00005.au 7 +pop/pop.00011.au 7 +pop/pop.00012.au 7 +pop/pop.00013.au 7 +pop/pop.00015.au 7 +pop/pop.00017.au 7 +pop/pop.00018.au 7 +pop/pop.00020.au 7 +pop/pop.00022.au 7 +pop/pop.00026.au 7 +pop/pop.00030.au 7 +pop/pop.00037.au 7 +pop/pop.00040.au 7 +pop/pop.00045.au 7 +pop/pop.00049.au 7 +pop/pop.00050.au 7 +pop/pop.00051.au 7 +pop/pop.00052.au 7 +pop/pop.00066.au 7 +pop/pop.00071.au 7 +pop/pop.00074.au 7 +pop/pop.00078.au 7 +pop/pop.00085.au 7 +pop/pop.00086.au 7 +pop/pop.00088.au 7 +pop/pop.00091.au 7 +pop/pop.00093.au 7 +pop/pop.00096.au 7 +pop/pop.00097.au 7 diff --git a/datasets/gtzan/splits/reggae.train b/datasets/gtzan/splits/reggae.train new file mode 100644 index 0000000..2270293 --- /dev/null +++ b/datasets/gtzan/splits/reggae.train @@ -0,0 +1,70 @@ +reggae/reggae.00000.au 8 +reggae/reggae.00002.au 8 +reggae/reggae.00004.au 8 +reggae/reggae.00005.au 8 +reggae/reggae.00006.au 8 +reggae/reggae.00007.au 8 +reggae/reggae.00008.au 8 +reggae/reggae.00009.au 8 +reggae/reggae.00010.au 8 +reggae/reggae.00013.au 8 +reggae/reggae.00014.au 8 +reggae/reggae.00015.au 8 +reggae/reggae.00018.au 8 +reggae/reggae.00020.au 8 +reggae/reggae.00021.au 8 +reggae/reggae.00022.au 8 +reggae/reggae.00025.au 8 +reggae/reggae.00026.au 8 +reggae/reggae.00027.au 8 +reggae/reggae.00028.au 8 +reggae/reggae.00031.au 8 +reggae/reggae.00032.au 8 +reggae/reggae.00034.au 8 +reggae/reggae.00035.au 8 +reggae/reggae.00036.au 8 +reggae/reggae.00037.au 8 +reggae/reggae.00038.au 8 +reggae/reggae.00040.au 8 +reggae/reggae.00042.au 8 +reggae/reggae.00045.au 8 +reggae/reggae.00046.au 8 +reggae/reggae.00047.au 8 +reggae/reggae.00048.au 8 +reggae/reggae.00049.au 8 +reggae/reggae.00050.au 8 +reggae/reggae.00051.au 8 +reggae/reggae.00052.au 8 +reggae/reggae.00053.au 8 +reggae/reggae.00054.au 8 +reggae/reggae.00056.au 8 +reggae/reggae.00058.au 8 +reggae/reggae.00059.au 8 +reggae/reggae.00060.au 8 +reggae/reggae.00061.au 8 +reggae/reggae.00063.au 8 +reggae/reggae.00065.au 8 +reggae/reggae.00066.au 8 +reggae/reggae.00067.au 8 +reggae/reggae.00068.au 8 +reggae/reggae.00069.au 8 +reggae/reggae.00071.au 8 +reggae/reggae.00073.au 8 +reggae/reggae.00074.au 8 +reggae/reggae.00075.au 8 +reggae/reggae.00076.au 8 +reggae/reggae.00077.au 8 +reggae/reggae.00078.au 8 +reggae/reggae.00081.au 8 +reggae/reggae.00082.au 8 +reggae/reggae.00083.au 8 +reggae/reggae.00084.au 8 +reggae/reggae.00085.au 8 +reggae/reggae.00087.au 8 +reggae/reggae.00089.au 8 +reggae/reggae.00090.au 8 +reggae/reggae.00091.au 8 +reggae/reggae.00093.au 8 +reggae/reggae.00094.au 8 +reggae/reggae.00095.au 8 +reggae/reggae.00096.au 8 diff --git a/datasets/gtzan/splits/reggae.trainval b/datasets/gtzan/splits/reggae.trainval new file mode 100644 index 0000000..33fa350 --- /dev/null +++ b/datasets/gtzan/splits/reggae.trainval @@ -0,0 +1,100 @@ +reggae/reggae.00000.au 8 +reggae/reggae.00001.au 8 +reggae/reggae.00002.au 8 +reggae/reggae.00003.au 8 +reggae/reggae.00004.au 8 +reggae/reggae.00005.au 8 +reggae/reggae.00006.au 8 +reggae/reggae.00007.au 8 +reggae/reggae.00008.au 8 +reggae/reggae.00009.au 8 +reggae/reggae.00010.au 8 +reggae/reggae.00011.au 8 +reggae/reggae.00012.au 8 +reggae/reggae.00013.au 8 +reggae/reggae.00014.au 8 +reggae/reggae.00015.au 8 +reggae/reggae.00016.au 8 +reggae/reggae.00017.au 8 +reggae/reggae.00018.au 8 +reggae/reggae.00019.au 8 +reggae/reggae.00020.au 8 +reggae/reggae.00021.au 8 +reggae/reggae.00022.au 8 +reggae/reggae.00023.au 8 +reggae/reggae.00024.au 8 +reggae/reggae.00025.au 8 +reggae/reggae.00026.au 8 +reggae/reggae.00027.au 8 +reggae/reggae.00028.au 8 +reggae/reggae.00029.au 8 +reggae/reggae.00030.au 8 +reggae/reggae.00031.au 8 +reggae/reggae.00032.au 8 +reggae/reggae.00033.au 8 +reggae/reggae.00034.au 8 +reggae/reggae.00035.au 8 +reggae/reggae.00036.au 8 +reggae/reggae.00037.au 8 +reggae/reggae.00038.au 8 +reggae/reggae.00039.au 8 +reggae/reggae.00040.au 8 +reggae/reggae.00041.au 8 +reggae/reggae.00042.au 8 +reggae/reggae.00043.au 8 +reggae/reggae.00044.au 8 +reggae/reggae.00045.au 8 +reggae/reggae.00046.au 8 +reggae/reggae.00047.au 8 +reggae/reggae.00048.au 8 +reggae/reggae.00049.au 8 +reggae/reggae.00050.au 8 +reggae/reggae.00051.au 8 +reggae/reggae.00052.au 8 +reggae/reggae.00053.au 8 +reggae/reggae.00054.au 8 +reggae/reggae.00055.au 8 +reggae/reggae.00056.au 8 +reggae/reggae.00057.au 8 +reggae/reggae.00058.au 8 +reggae/reggae.00059.au 8 +reggae/reggae.00060.au 8 +reggae/reggae.00061.au 8 +reggae/reggae.00062.au 8 +reggae/reggae.00063.au 8 +reggae/reggae.00064.au 8 +reggae/reggae.00065.au 8 +reggae/reggae.00066.au 8 +reggae/reggae.00067.au 8 +reggae/reggae.00068.au 8 +reggae/reggae.00069.au 8 +reggae/reggae.00070.au 8 +reggae/reggae.00071.au 8 +reggae/reggae.00072.au 8 +reggae/reggae.00073.au 8 +reggae/reggae.00074.au 8 +reggae/reggae.00075.au 8 +reggae/reggae.00076.au 8 +reggae/reggae.00077.au 8 +reggae/reggae.00078.au 8 +reggae/reggae.00079.au 8 +reggae/reggae.00080.au 8 +reggae/reggae.00081.au 8 +reggae/reggae.00082.au 8 +reggae/reggae.00083.au 8 +reggae/reggae.00084.au 8 +reggae/reggae.00085.au 8 +reggae/reggae.00086.au 8 +reggae/reggae.00087.au 8 +reggae/reggae.00088.au 8 +reggae/reggae.00089.au 8 +reggae/reggae.00090.au 8 +reggae/reggae.00091.au 8 +reggae/reggae.00092.au 8 +reggae/reggae.00093.au 8 +reggae/reggae.00094.au 8 +reggae/reggae.00095.au 8 +reggae/reggae.00096.au 8 +reggae/reggae.00097.au 8 +reggae/reggae.00098.au 8 +reggae/reggae.00099.au 8 diff --git a/datasets/gtzan/splits/reggae.val b/datasets/gtzan/splits/reggae.val new file mode 100644 index 0000000..de6f563 --- /dev/null +++ b/datasets/gtzan/splits/reggae.val @@ -0,0 +1,30 @@ +reggae/reggae.00001.au 8 +reggae/reggae.00003.au 8 +reggae/reggae.00011.au 8 +reggae/reggae.00012.au 8 +reggae/reggae.00016.au 8 +reggae/reggae.00017.au 8 +reggae/reggae.00019.au 8 +reggae/reggae.00023.au 8 +reggae/reggae.00024.au 8 +reggae/reggae.00029.au 8 +reggae/reggae.00030.au 8 +reggae/reggae.00033.au 8 +reggae/reggae.00039.au 8 +reggae/reggae.00041.au 8 +reggae/reggae.00043.au 8 +reggae/reggae.00044.au 8 +reggae/reggae.00055.au 8 +reggae/reggae.00057.au 8 +reggae/reggae.00062.au 8 +reggae/reggae.00064.au 8 +reggae/reggae.00070.au 8 +reggae/reggae.00072.au 8 +reggae/reggae.00079.au 8 +reggae/reggae.00080.au 8 +reggae/reggae.00086.au 8 +reggae/reggae.00088.au 8 +reggae/reggae.00092.au 8 +reggae/reggae.00097.au 8 +reggae/reggae.00098.au 8 +reggae/reggae.00099.au 8 diff --git a/datasets/gtzan/splits/rock.train b/datasets/gtzan/splits/rock.train new file mode 100644 index 0000000..fc4d9ad --- /dev/null +++ b/datasets/gtzan/splits/rock.train @@ -0,0 +1,70 @@ +rock/rock.00000.au 9 +rock/rock.00001.au 9 +rock/rock.00002.au 9 +rock/rock.00004.au 9 +rock/rock.00005.au 9 +rock/rock.00007.au 9 +rock/rock.00008.au 9 +rock/rock.00009.au 9 +rock/rock.00010.au 9 +rock/rock.00011.au 9 +rock/rock.00012.au 9 +rock/rock.00013.au 9 +rock/rock.00015.au 9 +rock/rock.00016.au 9 +rock/rock.00017.au 9 +rock/rock.00018.au 9 +rock/rock.00019.au 9 +rock/rock.00022.au 9 +rock/rock.00024.au 9 +rock/rock.00027.au 9 +rock/rock.00029.au 9 +rock/rock.00030.au 9 +rock/rock.00033.au 9 +rock/rock.00034.au 9 +rock/rock.00035.au 9 +rock/rock.00037.au 9 +rock/rock.00038.au 9 +rock/rock.00040.au 9 +rock/rock.00043.au 9 +rock/rock.00045.au 9 +rock/rock.00047.au 9 +rock/rock.00049.au 9 +rock/rock.00050.au 9 +rock/rock.00052.au 9 +rock/rock.00053.au 9 +rock/rock.00054.au 9 +rock/rock.00056.au 9 +rock/rock.00057.au 9 +rock/rock.00058.au 9 +rock/rock.00059.au 9 +rock/rock.00060.au 9 +rock/rock.00062.au 9 +rock/rock.00063.au 9 +rock/rock.00064.au 9 +rock/rock.00065.au 9 +rock/rock.00066.au 9 +rock/rock.00067.au 9 +rock/rock.00069.au 9 +rock/rock.00071.au 9 +rock/rock.00072.au 9 +rock/rock.00074.au 9 +rock/rock.00076.au 9 +rock/rock.00077.au 9 +rock/rock.00079.au 9 +rock/rock.00080.au 9 +rock/rock.00081.au 9 +rock/rock.00084.au 9 +rock/rock.00085.au 9 +rock/rock.00087.au 9 +rock/rock.00088.au 9 +rock/rock.00089.au 9 +rock/rock.00090.au 9 +rock/rock.00091.au 9 +rock/rock.00092.au 9 +rock/rock.00093.au 9 +rock/rock.00094.au 9 +rock/rock.00096.au 9 +rock/rock.00097.au 9 +rock/rock.00098.au 9 +rock/rock.00099.au 9 diff --git a/datasets/gtzan/splits/rock.trainval b/datasets/gtzan/splits/rock.trainval new file mode 100644 index 0000000..b8a9b9a --- /dev/null +++ b/datasets/gtzan/splits/rock.trainval @@ -0,0 +1,100 @@ +rock/rock.00000.au 9 +rock/rock.00001.au 9 +rock/rock.00002.au 9 +rock/rock.00003.au 9 +rock/rock.00004.au 9 +rock/rock.00005.au 9 +rock/rock.00006.au 9 +rock/rock.00007.au 9 +rock/rock.00008.au 9 +rock/rock.00009.au 9 +rock/rock.00010.au 9 +rock/rock.00011.au 9 +rock/rock.00012.au 9 +rock/rock.00013.au 9 +rock/rock.00014.au 9 +rock/rock.00015.au 9 +rock/rock.00016.au 9 +rock/rock.00017.au 9 +rock/rock.00018.au 9 +rock/rock.00019.au 9 +rock/rock.00020.au 9 +rock/rock.00021.au 9 +rock/rock.00022.au 9 +rock/rock.00023.au 9 +rock/rock.00024.au 9 +rock/rock.00025.au 9 +rock/rock.00026.au 9 +rock/rock.00027.au 9 +rock/rock.00028.au 9 +rock/rock.00029.au 9 +rock/rock.00030.au 9 +rock/rock.00031.au 9 +rock/rock.00032.au 9 +rock/rock.00033.au 9 +rock/rock.00034.au 9 +rock/rock.00035.au 9 +rock/rock.00036.au 9 +rock/rock.00037.au 9 +rock/rock.00038.au 9 +rock/rock.00039.au 9 +rock/rock.00040.au 9 +rock/rock.00041.au 9 +rock/rock.00042.au 9 +rock/rock.00043.au 9 +rock/rock.00044.au 9 +rock/rock.00045.au 9 +rock/rock.00046.au 9 +rock/rock.00047.au 9 +rock/rock.00048.au 9 +rock/rock.00049.au 9 +rock/rock.00050.au 9 +rock/rock.00051.au 9 +rock/rock.00052.au 9 +rock/rock.00053.au 9 +rock/rock.00054.au 9 +rock/rock.00055.au 9 +rock/rock.00056.au 9 +rock/rock.00057.au 9 +rock/rock.00058.au 9 +rock/rock.00059.au 9 +rock/rock.00060.au 9 +rock/rock.00061.au 9 +rock/rock.00062.au 9 +rock/rock.00063.au 9 +rock/rock.00064.au 9 +rock/rock.00065.au 9 +rock/rock.00066.au 9 +rock/rock.00067.au 9 +rock/rock.00068.au 9 +rock/rock.00069.au 9 +rock/rock.00070.au 9 +rock/rock.00071.au 9 +rock/rock.00072.au 9 +rock/rock.00073.au 9 +rock/rock.00074.au 9 +rock/rock.00075.au 9 +rock/rock.00076.au 9 +rock/rock.00077.au 9 +rock/rock.00078.au 9 +rock/rock.00079.au 9 +rock/rock.00080.au 9 +rock/rock.00081.au 9 +rock/rock.00082.au 9 +rock/rock.00083.au 9 +rock/rock.00084.au 9 +rock/rock.00085.au 9 +rock/rock.00086.au 9 +rock/rock.00087.au 9 +rock/rock.00088.au 9 +rock/rock.00089.au 9 +rock/rock.00090.au 9 +rock/rock.00091.au 9 +rock/rock.00092.au 9 +rock/rock.00093.au 9 +rock/rock.00094.au 9 +rock/rock.00095.au 9 +rock/rock.00096.au 9 +rock/rock.00097.au 9 +rock/rock.00098.au 9 +rock/rock.00099.au 9 diff --git a/datasets/gtzan/splits/rock.val b/datasets/gtzan/splits/rock.val new file mode 100644 index 0000000..57ad0f4 --- /dev/null +++ b/datasets/gtzan/splits/rock.val @@ -0,0 +1,30 @@ +rock/rock.00003.au 9 +rock/rock.00006.au 9 +rock/rock.00014.au 9 +rock/rock.00020.au 9 +rock/rock.00021.au 9 +rock/rock.00023.au 9 +rock/rock.00025.au 9 +rock/rock.00026.au 9 +rock/rock.00028.au 9 +rock/rock.00031.au 9 +rock/rock.00032.au 9 +rock/rock.00036.au 9 +rock/rock.00039.au 9 +rock/rock.00041.au 9 +rock/rock.00042.au 9 +rock/rock.00044.au 9 +rock/rock.00046.au 9 +rock/rock.00048.au 9 +rock/rock.00051.au 9 +rock/rock.00055.au 9 +rock/rock.00061.au 9 +rock/rock.00068.au 9 +rock/rock.00070.au 9 +rock/rock.00073.au 9 +rock/rock.00075.au 9 +rock/rock.00078.au 9 +rock/rock.00082.au 9 +rock/rock.00083.au 9 +rock/rock.00086.au 9 +rock/rock.00095.au 9 diff --git a/datasets/uci_adult/features b/datasets/uci_adult/features new file mode 100644 index 0000000..eef456b --- /dev/null +++ b/datasets/uci_adult/features @@ -0,0 +1,14 @@ +C +Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked +C +Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool +C +Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse +Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces +Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried +White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black +Female, Male +C +C +C +United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands diff --git a/datasets/uci_adult/get_data.sh b/datasets/uci_adult/get_data.sh new file mode 100644 index 0000000..6f9d07c --- /dev/null +++ b/datasets/uci_adult/get_data.sh @@ -0,0 +1,8 @@ +#Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +#fit your own datasets. +#Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +#Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +#ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +#ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +wget http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data +wget http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test diff --git a/datasets/uci_letter/get_data.sh b/datasets/uci_letter/get_data.sh new file mode 100644 index 0000000..3dcf3a9 --- /dev/null +++ b/datasets/uci_letter/get_data.sh @@ -0,0 +1,7 @@ +#Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +#fit your own datasets. +#Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +#Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +#ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +#ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +wget http://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data \ No newline at end of file diff --git a/datasets/uci_semg/get_data.sh b/datasets/uci_semg/get_data.sh new file mode 100644 index 0000000..d630b58 --- /dev/null +++ b/datasets/uci_semg/get_data.sh @@ -0,0 +1,8 @@ +#Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +#fit your own datasets. +#Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +#Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +#ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +#ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +wget http://archive.ics.uci.edu/ml/machine-learning-databases/00313/sEMG_Basic_Hand_movements_upatras.zip +unzip sEMG_Basic_Hand_movements_upatras.zip diff --git a/datasets/uci_yeast/get_data.sh b/datasets/uci_yeast/get_data.sh new file mode 100644 index 0000000..e169d9d --- /dev/null +++ b/datasets/uci_yeast/get_data.sh @@ -0,0 +1,7 @@ +#Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +#fit your own datasets. +#Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +#Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +#ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +#ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +wget http://archive.ics.uci.edu/ml/machine-learning-databases/yeast/yeast.data diff --git a/datasets/uci_yeast/yeast.label b/datasets/uci_yeast/yeast.label new file mode 100644 index 0000000..977ed84 --- /dev/null +++ b/datasets/uci_yeast/yeast.label @@ -0,0 +1,10 @@ +0 CYT +1 NUC +2 MIT +3 ME3 +4 ME2 +5 ME1 +6 EXC +7 VAC +8 POX +9 ERL \ No newline at end of file diff --git a/lib/gcforest/__init__.py b/lib/gcforest/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/gcforest/cascade/__init__.py b/lib/gcforest/cascade/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/gcforest/cascade/cascade_classifier.py b/lib/gcforest/cascade/cascade_classifier.py new file mode 100644 index 0000000..acb86b6 --- /dev/null +++ b/lib/gcforest/cascade/cascade_classifier.py @@ -0,0 +1,231 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import sys, os, os.path as osp +import numpy as np +import json +import pickle + +from ..estimators import get_estimator_kfold +from ..estimators.est_utils import xgb_train +from ..utils.config_utils import get_config_value +from ..utils.log_utils import get_logger +from ..utils.metrics import accuracy_pb + +LOGGER = get_logger('gcforest.cascade.cascade_classifier') + +def check_dir(path): + d = osp.abspath(osp.join(path, osp.pardir)) + if not osp.exists(d): + os.makedirs(d) + +def calc_accuracy(y_true, y_pred, name, prefix=""): + acc = 100. * np.sum(np.asarray(y_true)==y_pred) / len(y_true) + LOGGER.info('{}Accuracy({})={:.2f}%'.format(prefix, name, acc)) + return acc + +def get_opt_layer_id(acc_list): + """ Return layer id with max accuracy on training data """ + opt_layer_id = np.argsort(-np.asarray(acc_list), kind='mergesort')[0] + return opt_layer_id + + +class CascadeClassifier(object): + def __init__(self, ca_config): + """ + Parameters (ca_config) + ---------- + early_stopping_rounds: int + when not None , means when the accuracy does not increase in early_stopping_rounds, the cascade level will stop automatically growing + max_layers: int + maximum number of cascade layers allowed for exepriments, 0 means use Early Stoping to automatically find the layer number + n_classes: int + Number of classes + est_configs: + List of CVEstimator's config + look_indexs_cycle (list 2d): default=None + specification for layer i, look for the array in look_indexs_cycle[i % len(look_indexs_cycle)] + defalut = None <=> [range(n_groups)] + .e.g. + look_indexs_cycle = [[0,1],[2,3],[0,1,2,3]] + means layer 1 look for the grained 0,1; layer 2 look for grained 2,3; layer 3 look for every grained, and layer 4 cycles back as layer 1 + data_save_rounds: int [default=0] + data_save_dir: str [default=None] + each data_save_rounds save the intermidiate results in data_save_dir + if data_save_rounds = 0, then no savings for intermidiate results + """ + self.ca_config = ca_config + self.early_stopping_rounds = self.get_value("early_stopping_rounds", None, int, required=True) + self.max_layers = self.get_value("max_layers", 0, int) + self.n_classes = self.get_value("n_classes", None, int, required=True) + self.est_configs = self.get_value("estimators", None, list, required=True) + self.look_indexs_cycle = self.get_value("look_indexs_cycle", None, list) + self.random_state = self.get_value("random_state", None, int) + self.data_save_dir = self.get_value("data_save_dir", None, basestring) + self.data_save_rounds = self.get_value("data_save_rounds", 0, int) + if self.data_save_rounds > 0: + assert self.data_save_dir is not None, "data_save_dir should not be null when data_save_rounds>0" + self.eval_metrics = [("predict", accuracy_pb)] + #LOGGER.info("\n" + json.dumps(ca_config, sort_keys=True, indent=4, separators=(',', ':'))) + + @property + def n_estimators_1(self): + # estimators of one layer + return len(self.est_configs) + + def get_value(self, key, default_value, value_types, required=False): + return get_config_value(self.ca_config, key, default_value, value_types, + required=required, config_name="cascade") + + def _init_estimators(self, li, ei): + est_args = self.est_configs[ei].copy() + est_name ="layer_{} - estimator_{} - {}_folds".format(li, ei, est_args["n_folds"]) + # n_folds + n_folds = int(est_args["n_folds"]) + est_args.pop("n_folds") + # est_type + est_type = est_args["type"] + est_args.pop("type") + # random_state + if self.random_state is not None: + random_state = (self.random_state + hash("[estimator] {}".format(est_name))) % 1000000007 + else: + random_state = None + return get_estimator_kfold(est_name, n_folds, est_type, est_args, random_state=random_state) + + def fit_transform(self, X_groups_train, y_train, X_groups_test, y_test, stop_by_test=False): + """ + fit until the accuracy converges in early_stop_rounds + stop_by_test: (bool) + When X_test, y_test is validation data that used for determine the opt_layer_id, + use this option + """ + if not type(X_groups_train) == list: + X_groups_train = [X_groups_train] + if not type(X_groups_test) == list: + X_groups_test = [X_groups_test] + LOGGER.info("X_groups_train.shape={},y_train.shape={},X__groups_test.shape={},y_test.shape={}".format( + [xr.shape for xr in X_groups_train], y_train.shape, [xt.shape for xt in X_groups_test], y_test.shape)) + n_groups = len(X_groups_train) + # check look_indexs_cycle + if self.look_indexs_cycle is None: + look_indexs_cycle = [list(range(n_groups))] + else: + look_indexs_cycle = self.look_indexs_cycle + for look_indexs in look_indexs_cycle: + if np.max(look_indexs) >= n_groups or np.min(look_indexs) < 0 or len(look_indexs) == 0: + raise ValueError("look_indexs unlegal!!! look_indexs={}".format(look_indexs)) + # init groups + group_starts, group_ends, group_dims = [], [], [] + n_trains = X_groups_train[0].shape[0] + n_tests = X_groups_test[0].shape[0] + X_train = np.zeros((n_trains, 0), dtype=X_groups_train[0].dtype) + X_test = np.zeros((n_tests, 0), dtype=X_groups_test[0].dtype) + for i, X_group in enumerate(X_groups_train): + assert(X_group.shape[0] == n_trains) + X_group = X_group.reshape(n_trains, -1) + group_dims.append( X_group.shape[1] ) + group_starts.append(i if i == 0 else group_starts[i - 1] + group_dims[i]) + group_ends.append(group_starts[i] + group_dims[i]) + X_train = np.hstack((X_train, X_group)) + LOGGER.info("group_dims={}".format(group_dims)) + for i, X_group in enumerate(X_groups_test): + assert(X_group.shape[0] == n_tests) + X_group = X_group.reshape(n_tests, -1) + assert(X_group.shape[1] == group_dims[i]) + X_test = np.hstack((X_test, X_group)) + LOGGER.info("X_train.shape={},X_test.shape={}".format(X_train.shape, X_test.shape)) + + n_classes = self.n_classes + assert n_classes == len(np.unique(y_train)), "n_classes({}) != len(unique(y)) {}".format(n_classes, np.unique(y_train)) + train_acc_list = [] + test_acc_list = [] + # X_train, y_train, X_test, y_test + opt_datas = [None, None, None, None] + try: + # probability of each cascades's estimators + X_proba_train = np.zeros((X_train.shape[0],n_classes*self.n_estimators_1), dtype=np.float32) + X_proba_test = np.zeros((X_test.shape[0],n_classes*self.n_estimators_1), dtype=np.float32) + X_cur_train, X_cur_test = None, None + layer_id = 0 + while 1: + if self.max_layers > 0 and layer_id >= self.max_layers: + break + # Copy previous cascades's probability into current X_cur + if layer_id == 0: + # first layer not have probability distribution + X_cur_train = np.zeros((n_trains, 0), dtype=np.float32) + X_cur_test = np.zeros((n_tests, 0), dtype=np.float32) + else: + X_cur_train = X_proba_train.copy() + X_cur_test = X_proba_test.copy() + # Stack data that current layer needs in to X_cur + look_indexs = look_indexs_cycle[layer_id % len(look_indexs_cycle)] + for _i, i in enumerate(look_indexs): + X_cur_train = np.hstack((X_cur_train, X_train[:,group_starts[i]:group_ends[i]])) + X_cur_test = np.hstack((X_cur_test, X_test[:,group_starts[i]:group_ends[i]])) + LOGGER.info("[layer={}] look_indexs={}, X_cur_train.shape={}, X_cur_test.shape={}".format( + layer_id, look_indexs, X_cur_train.shape, X_cur_test.shape)) + # Fit on X_cur, predict to update X_proba + y_train_proba_li = np.zeros((y_train.shape[0], n_classes)) + y_test_proba_li = np.zeros((y_test.shape[0], n_classes)) + for ei, est_config in enumerate(self.est_configs): + est = self._init_estimators(layer_id, ei) + # fit_trainsform + y_probas = est.fit_transform(X_cur_train, y_train, y_train, + test_sets=[("test", X_cur_test, y_test)], eval_metrics=self.eval_metrics, + keep_model_in_mem=False) + # train + X_proba_train[:,ei*n_classes:ei*n_classes+n_classes] = y_probas[0] + y_train_proba_li += y_probas[0] + # test + X_proba_test[:,ei*n_classes:ei*n_classes+n_classes] = y_probas[1] + y_test_proba_li += y_probas[1] + y_train_proba_li /= len(self.est_configs) + y_test_proba_li /= len(self.est_configs) + train_avg_acc = calc_accuracy(y_train, np.argmax(y_train_proba_li, axis=1), 'layer_{} - train.classifier_average'.format(layer_id)) + test_avg_acc = calc_accuracy(y_test, np.argmax(y_test_proba_li, axis=1), 'layer_{} - test.classifier_average'.format(layer_id)) + train_acc_list.append(train_avg_acc) + test_acc_list.append(test_avg_acc) + + opt_layer_id = get_opt_layer_id(test_acc_list if stop_by_test else train_acc_list) + # set opt_datas + if opt_layer_id == layer_id: + opt_datas = [X_cur_train, y_train, X_cur_test, y_test] + # early stop + if self.early_stopping_rounds > 0 and layer_id - opt_layer_id >= self.early_stopping_rounds: + # log and save final result (opt layer) + LOGGER.info("[Result][Optimal Level Detected] opt_layer_id={}, accuracy_train={:.2f}%, accuracy_test={:.2f}%".format( + opt_layer_id, train_acc_list[opt_layer_id], test_acc_list[opt_layer_id])) + if self.data_save_dir is not None: + self.save_data( opt_layer_id, *opt_datas) + return opt_layer_id, opt_datas[0], opt_datas[1], opt_datas[2], opt_datas[3] + # save opt data if needed + if self.data_save_rounds > 0 and (layer_id + 1) % self.data_save_rounds == 0: + self.save_data(layer_id, *opt_datas) + # inc layer_id + layer_id += 1 + opt_datas = [X_cur_train, y_train, X_cur_test, y_test] + # log and save final result (last layer) + LOGGER.info("[Result][Reach Max Layer] max_layer_num={}, accuracy_train={:.2f}%, accuracy_test={:.2f}%".format( + self.max_layers, train_acc_list[-1], test_acc_list[-1])) + if self.data_save_dir is not None: + self.save_data( self.max_layers - 1, *opt_datas) + return self.max_layers, opt_datas[0], opt_datas[1], opt_datas[2], opt_datas[3] + except KeyboardInterrupt: + pass + + def save_data(self, layer_id, X_train, y_train, X_test, y_test): + for pi, phase in enumerate(["train", "test"]): + data_path = osp.join(self.data_save_dir, "layer_{}-{}.pkl".format(layer_id, phase)) + check_dir(data_path) + data = {"X": X_train, "y": y_train} if pi == 0 else {"X": X_test, "y": y_test} + LOGGER.info("Saving Data in {} ... X.shape={}, y.shape={}".format(data_path, data["X"].shape, data["y"].shape)) + with open(data_path, "wb") as f: + pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) diff --git a/lib/gcforest/data_cache.py b/lib/gcforest/data_cache.py new file mode 100644 index 0000000..5077373 --- /dev/null +++ b/lib/gcforest/data_cache.py @@ -0,0 +1,118 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import os, os.path as osp +import numpy as np + +from .utils.log_utils import get_logger +from .utils.cache_utils import name2path + +LOGGER = get_logger("gcforest.data_cache") + +def check_dir(path): + """ make sure the dir specified by path got created """ + d = osp.abspath(osp.join(path, osp.pardir)) + if not osp.exists(d): + os.makedirs(d) + +def data_disk_path(cache_dir, phase, data_name): + data_path = osp.join(cache_dir, phase, name2path(data_name) + ".npy") + return data_path + +class DataCache(object): + def __init__(self, config): + self.config = config + self.cache_dir = config.get("cache_dir", None) + if self.config.get("keep_in_mem") is None: + self.config["keep_in_mem"] = {"default": 1} + if self.config.get("cache_in_disk") is None: + self.config["cache_in_disk"] = {"default": 0} + self.datas = {"train": {}, "test": {}} + + def keep_in_mem(self, phase, data_name): + """ + determine if the data for (phase, data_name) should be kept in RAM + if config["keep_in_mem"][data_name] exist, then use it, otherwise use the default value of config["keep_in_mem"] + """ + return self.config["keep_in_mem"].get(data_name, self.config["keep_in_mem"]["default"]) + + def cache_in_disk(self, phase, data_name): + """ + check data for (phase, data_name) is cached in disk + if config["cache_in_disk"][data_name] exist, then use it , otherwise use default value of config["cache_in_disk"] + """ + return self.config["cache_in_disk"].get(data_name, self.config["cache_in_disk"]["default"]) + + def is_exist(self, phase, data_name): + """ + check data_name is generated or cashed to disk + """ + data_mem = self.datas[phase].get(data_name, None) + if data_mem is not None: + return True + if self.cache_dir is None: + return False + data_path = data_disk_path(self.cache_dir, phase, data_name) + if osp.exists(data_path): + return data_path + return None + + def gets(self, phase, data_names, ignore_no_exist=False): + assert isinstance(data_names, list) + datas = [] + for data_name in data_names: + datas.append(self.get(phase, data_name, ignore_no_exist=ignore_no_exist)) + return datas + + def get(self, phase, data_name, ignore_no_exist=False): + """ + get data according to data_name + + Arguments + --------- + phase (str): train or test + data_name (str): name for tops/bottoms + ignore_no_exist (bool): if True, when no data found, return None, otherwise raise e + """ + assert isinstance(data_name, basestring), "data_name={}, type(data_name)={}".format(data_name, type(data_name)) + # return data if data in memory + data_mem = self.datas[phase].get(data_name, None) + if data_mem is not None: + return data_mem + # load data from disk + if self.cache_dir is None: + if ignore_no_exist: + return None + raise ValueError("Cache base unset, can't load data ({}->{}) from disk".format(phase, data_name)) + data_path = data_disk_path(self.cache_dir, phase, data_name) + if not osp.exists(data_path): + if ignore_no_exist: + return None + raise ValueError("Data path not exist, can't load data ({}->{}) from disk: {}".format(phase, data_name, data_path)) + return np.load(data_path) + + def updates(self, phase, data_names, datas): + assert isinstance(data_names, list) + for i, data_name in enumerate(data_names): + self.update(phase, data_name, datas[i]) + + def update(self, phase, data_name, data): + """ + update (phase, data_name) data in cache + """ + assert isinstance(data, np.ndarray), "data(type={}) is not a np.ndarray!!!".format(type(data)) + if self.keep_in_mem(phase, data_name): + self.datas[phase][data_name] = data + if self.cache_in_disk(phase, data_name): + if self.cache_dir is None: + raise ValueError("Cache base unset, can't Save data ({}->{}) to disk".format(phase, data_name)) + data_path = data_disk_path(self.cache_dir, phase, data_name) + LOGGER.info("Updating data ({}->{}, shape={}) in disk: {}".format(phase, data_name, data.shape, data_path)) + check_dir(data_path); + np.save(data_path, data) diff --git a/lib/gcforest/datasets/__init__.py b/lib/gcforest/datasets/__init__.py new file mode 100644 index 0000000..ed56618 --- /dev/null +++ b/lib/gcforest/datasets/__init__.py @@ -0,0 +1,52 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +from .cifar10 import CIFAR10 +from .ds_pickle import DSPickle +from .ds_pickle2 import DSPickle2 +from .gtzan import GTZAN +from .imdb import IMDB +from .mnist import MNIST +from .olivetti_face import OlivettiFace +from .uci_adult import UCIAdult +from .uci_letter import UCILetter +from .uci_semg import UCISEMG +from .uci_yeast import UCIYeast + +def get_ds_class(type_name): + if type_name == 'cifar10': + return CIFAR10 + if type_name == "ds_pickle": + return DSPickle + if type_name == "ds_pickle2": + return DSPickle2 + if type_name == "gtzan": + return GTZAN + if type_name == 'imdb': + return IMDB + if type_name == 'mnist': + return MNIST + if type_name == "olivetti_face": + return OlivettiFace + if type_name == 'uci_adult': + return UCIAdult + if type_name == 'uci_letter': + return UCILetter + if type_name == 'uci_semg': + return UCISEMG + if type_name == 'uci_yeast': + return UCIYeast + return None + +def get_dataset(ds_config): + type_name = ds_config['type'] + ds_config.pop("type") + ds_class = get_ds_class(type_name) + if ds_class is None: + raise ValueError('Unkonw Dataset Type: ', type_name) + return ds_class(**ds_config) diff --git a/lib/gcforest/datasets/cifar10.py b/lib/gcforest/datasets/cifar10.py new file mode 100644 index 0000000..ea89734 --- /dev/null +++ b/lib/gcforest/datasets/cifar10.py @@ -0,0 +1,55 @@ +# encoding: utf-8 +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" + +import numpy as np +from .ds_base import ds_base +from keras.datasets import cifar10 + +cls_names = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') + +""" +X_train.shape: (50000, 3, 32, 32) +X_test.shape: (10000, 3, 32, 32) +y: 10 labels +""" +class CIFAR10(ds_base): + def __init__(self, **kwargs): + super(CIFAR10, self).__init__(**kwargs) + self.cls_names = cls_names + (X_train, y_train), (X_test, y_test) = cifar10.load_data() + y_train = y_train.reshape((y_train.shape[0])) + y_test = y_test.reshape((y_test.shape[0])) + if self.data_set == 'train': + X = X_train + y = y_train + elif self.data_set == 'train-small': + X = X_train[:1000] + y = y_train[:1000] + elif self.data_set == 'test': + X = X_test + y = y_test + elif self.data_set == 'test-small': + X = X_test[:1000] + y = y_test[:1000] + elif self.data_set == 'all': + X = np.vstack((X_train, X_test)) + y = np.vstack((y_train, y_test)) + else: + raise ValueError('MNIST Unsupported data_set: ', self.data_set) + if X.shape[-1] == 3: + X = X.transpose((0, 3, 1, 2)) + # normalization + if self.norm: + X = X.astype(np.float32) / 255 + X = self.init_layout_X(X) + y = self.init_layout_y(y) + self.X = X + self.y = y + diff --git a/lib/gcforest/datasets/ds_base.py b/lib/gcforest/datasets/ds_base.py new file mode 100644 index 0000000..1415aad --- /dev/null +++ b/lib/gcforest/datasets/ds_base.py @@ -0,0 +1,81 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" + +import os.path as osp +import numpy as np + +class ds_base(object): + def __init__(self, data_set="train", norm=False, layout_x="tensor", layout_y="label", conf=None): + self.conf = conf + if conf is not None: + self.data_set = conf["data_set"] + self.norm = int(conf.get("norm", 0)) + self.layout_x = conf.get("layout_x", "tensor") + self.layout_y = conf.get("layout_y", "label") + else: + self.data_set = data_set + self.norm = norm + self.layout_x = layout_x + self.layout_y = layout_y + + @property + def n_classes(self): + if hasattr(self, "n_classes_"): + return self.n_classes_ + return len(np.unique(self.y)) + + def init_layout_X(self, X): + """ + input X format: tensor + """ + # reshape X + if self.layout_x == "tensor": + pass + elif self.layout_x == "vector": + X = X.reshape((X.shape[0], -1)) + elif self.layout_x == "sequence": + assert X.shape[3] == 1 + X = X[:,:,:,0].transpose((0,2,1)) + else: + raise ValueError("DataSet doesn't supported layout_x: ", self.layout_x) + return X + + def init_layout_y(self, y, X=None): + # reshape y + if self.layout_y == "label": + pass + elif self.layout_y == "bin": + from keras.utils import np_utils + y = np_utils.to_categorical(y) + elif self.layout_y == "autoencoder": + y = X + else: + raise ValueError("MNIST Unsupported layout_y: ", self.layout_y) + return y + + def get_data_by_imageset(self, X_train, y_train, X_test, y_test): + if self.data_set == "train": + X = X_train + y = y_train + elif self.data_set == "test": + X = X_test + y = y_test + elif self.data_set == "all": + X = np.vstack((X_train, X_test)) + y = np.vstack((y_train, y_test)) + else: + raise ValueError("Unsupported data_set: ", self.data_set) + return X, y + +def get_dataset_base(): + return osp.abspath(osp.join(__file__, osp.pardir, osp.pardir, osp.pardir, osp.pardir, "datasets")) + +def get_dataset_cache_base(): + return osp.abspath(osp.join(__file__, osp.pardir, osp.pardir, osp.pardir, osp.pardir, "datasets-cache")) diff --git a/lib/gcforest/datasets/ds_pickle.py b/lib/gcforest/datasets/ds_pickle.py new file mode 100644 index 0000000..4bad853 --- /dev/null +++ b/lib/gcforest/datasets/ds_pickle.py @@ -0,0 +1,44 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +from __future__ import print_function +import pickle +import os, os.path as osp +from .ds_base import ds_base +""" +Using cPickle to save and load dataset +""" + +def save_dataset(data_path, X, y): + print('Data Saving in {} (X.shape={},y.shape={})'.format( + data_path, X.shape, y.shape)) + data_dir = osp.abspath(osp.join(data_path, osp.pardir)) + if not osp.exists(data_dir): + os.makedirs(data_dir) + data = {'X': X, 'y': y} + with open(data_path, 'wb') as f: + pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) + +def load_dataset(data_path): + with open(data_path, 'rb') as f: + data = pickle.load(f) + X = data['X'] + y = data['y'] + print('Data Loaded from {} (X.shape={}, y.shape={})'.format(data_path, X.shape, y.shape)) + return X, y + +class DSPickle(ds_base): + def __init__(self, data_path, **kwargs): + super(DSPickle, self).__init__(**kwargs) + self.data_path = data_path + X, y = load_dataset(data_path) + + X = self.init_layout_X(X) + y = self.init_layout_y(y) + self.X = X + self.y = y diff --git a/lib/gcforest/datasets/ds_pickle2.py b/lib/gcforest/datasets/ds_pickle2.py new file mode 100644 index 0000000..2f084d6 --- /dev/null +++ b/lib/gcforest/datasets/ds_pickle2.py @@ -0,0 +1,23 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +from __future__ import print_function +import pickle +import os, os.path as osp + +class DSPickle2(object): + def __init__(self, data_path, X_keys): + self.data_path = data_path + print('Loading data from {}'.format(data_path)) + with open(data_path) as f: + datas = pickle.load(f) + self.X = [] + for X_key in X_keys: + self.X.append(datas[X_key]) + self.y = datas["y"] + print('Data Loaded (X.shape={}, y.shape={})'.format([x1.shape for x1 in self.X], self.y.shape)) diff --git a/lib/gcforest/datasets/gtzan.py b/lib/gcforest/datasets/gtzan.py new file mode 100644 index 0000000..3a41803 --- /dev/null +++ b/lib/gcforest/datasets/gtzan.py @@ -0,0 +1,113 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np +import os, os.path as osp +import sys +from .ds_base import ds_base, get_dataset_base, get_dataset_cache_base + +DEFAULT_DATA_BASE = osp.abspath( osp.join(get_dataset_base(),'gtzan','genres') ) +DEFAULT_IMAGEST_BASE = osp.abspath( osp.join(get_dataset_base(),'gtzan','splits') ) +DEFAULT_CACHE_BASE = osp.abspath( osp.join(get_dataset_cache_base(),'gtzan') ) +DEFAULT_GENRE_LIST = ( + 'blues', + 'classical', + 'country', + 'disco', + 'hiphop', + 'jazz', + 'metal', + 'pop', + 'reggae', + 'rock', + ) + +def parse_anno_file(anno_path): + X = [] + y = [] + with open(anno_path, 'r') as f: + for row in f: + cols = row.strip().split(' ') + X.append(cols[0]) + y.append(int(cols[1])) + y = np.asarray(y, dtype=np.int16) + return X, y + +def read_data(anno_path, mode, genre_base=None): + genre_base = genre_base or DEFAULT_DATA_BASE + au_path_list = [] + y = [] + with open(anno_path) as f: + for row in f: + cols = row.strip().split(' ') + au_path = osp.join(genre_base, cols[0]) + au_path_list.append(au_path) + y.append(int(cols[1])) + if mode == 'fft': + X = Parallel(n_jobs=-1, backend='threading')( + delayed(get_fft_feature)(au_path, 1000) + for i, au_path in enumerate(au_path_list)) + elif mode == 'ceps': + X = Parallel(n_jobs=-1, backend='threading')( + delayed(get_ceps_feature)(au_path) + for i, au_path in enumerate(au_path_list)) + else: + raise ValueError('Unkown mode: ', mode) + X = np.asarray(X) + y = np.asarray(y) + return X, y + +class GTZAN(ds_base): + def __init__(self, cache=None, **kwargs): + super(GTZAN, self).__init__(**kwargs) + if kwargs.get('conf') is not None: + conf = kwargs['conf'] + cache = conf.get('cache', None) + data_set_path = osp.join(DEFAULT_IMAGEST_BASE, self.data_set) + self.data_set_path = data_set_path + self.cache = cache + X, y = parse_anno_file(data_set_path) + if cache == 'raw': + import librosa + from tqdm import trange + X_new = np.zeros((len(X), 1, 661500, 1)) + for i in trange(len(X)): + x,_ = librosa.load(osp.join(DEFAULT_DATA_BASE, X[i])) + x_len = min(661500, len(x)) + X_new[i,:,:x_len,0] = x[:x_len] + if cache is not None and cache != 'raw': + X = self.load_cache_X(X, cache) + if cache == 'mfcc': + X_new = np.zeros((len(X), X[0].shape[0], 1280, 1)) + for i, x in enumerate(X): + x_len = min(x.shape[1], 1280) + X_new[i,:,:x_len,0] = x[:,:x_len] + X = X_new + + # layout_X + if self.layout_x == 'rel_path': + self.X = X + else: + self.X = self.init_layout_X(X) + # layout_y + self.y = self.init_layout_y(y) + + def load_cache_X(self, rel_paths, cache_name): + X = [] + for rel_path in rel_paths: + cache_path = osp.join(self.cache_base, cache_name, osp.splitext(rel_path)[0] + '.npy') + X.append(np.load(cache_path)) + return X + + @property + def cache_base(self): + return DEFAULT_CACHE_BASE + + @property + def data_base(self): + return DEFAULT_DATA_BASE diff --git a/lib/gcforest/datasets/imdb.py b/lib/gcforest/datasets/imdb.py new file mode 100644 index 0000000..cfbe846 --- /dev/null +++ b/lib/gcforest/datasets/imdb.py @@ -0,0 +1,71 @@ +# encoding: utf-8 +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import gzip +import numpy as np +from keras.datasets import imdb +from keras.preprocessing import sequence +from .ds_base import ds_base + +""" +X_train.len: min,mean,max=11,238,2494 +X_test.len: min,mean,max=7,230,2315 +""" +class IMDB(ds_base): + def __init__(self, feature='tfidf', **kwargs): + super(IMDB, self).__init__(**kwargs) + if self.conf is not None: + feature = self.conf.get('feature', 'tfidf') + if feature.startswith('tfidf'): + max_features = 5000 + (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) + else: + (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=None, + skip_top=0, maxlen=None, seed=113, start_char=1, oov_char=2, index_from=3) + X, y = self.get_data_by_imageset(X_train, y_train, X_test, y_test) + print('data_set={}, Average sequence length: {}'.format(self.data_set, np.mean(list(map(len, X))))) + + #feature + if feature == 'origin': + maxlen = 400 + X = sequence.pad_sequences(X, maxlen=maxlen) + elif feature == 'tfidf': + from sklearn.feature_extraction.text import TfidfTransformer + transformer = TfidfTransformer(smooth_idf=False) + #transformer = TfidfTransformer(smooth_idf=True) + X_train_bin = np.zeros((len(X_train), max_features), dtype=np.int16) + X_bin = np.zeros((len(X), max_features), dtype=np.int16) + for i, X_i in enumerate(X_train): + X_train_bin[i, :] = np.bincount(X_i, minlength=max_features) + for i, X_i in enumerate(X): + X_bin[i, :] = np.bincount(X_i, minlength=max_features) + transformer.fit(X_train_bin) + X = transformer.transform(X_bin) + X = np.asarray(X.todense()) + elif feature == 'tfidf_seq': + from sklearn.feature_extraction.text import TfidfTransformer + transformer = TfidfTransformer(smooth_idf=False) + maxlen = 400 + N = len(X) + X_bin = np.zeros((N, max_features), dtype=np.int16) + for i, X_i in enumerate(X): + X_bin_i = np.bincount(X_i) + X_bin[i, :len(X_bin_i)] = X_bin_i + tfidf = transformer.fit_transform(X_bin) + tfidf = np.asarray(tfidf.todense()) + X_id = sequence.pad_sequences(X, maxlen=maxlen) + X = np.zeros(X_id.shape, dtype=np.float32) + for i in range(N): + X[i, :] = tfidf[i][X_id[i]] + else: + raise ValueError('Unkown feature: ', feature) + + X = X[:,np.newaxis,:,np.newaxis] + self.X = self.init_layout_X(X) + self.y = self.init_layout_y(y) diff --git a/lib/gcforest/datasets/mnist.py b/lib/gcforest/datasets/mnist.py new file mode 100644 index 0000000..02dfa1d --- /dev/null +++ b/lib/gcforest/datasets/mnist.py @@ -0,0 +1,51 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np +import os.path as osp +from keras.datasets import mnist + +from .ds_base import ds_base + +class MNIST(ds_base): + def __init__(self, **kwargs): + super(MNIST, self).__init__(**kwargs) + # data_path = osp.abspath( osp.join(__file__, osp.pardir, osp.pardir, osp.pardir, osp.pardir, + # 'datasets/mnist/keras/mnist.pkl.gz') ) + # with gzip.open(data_path, 'rb') as f: + # (X_train, y_train), (X_test, y_test) = pickle.load(f) + # + (X_train, y_train), (X_test, y_test) = mnist.load_data() + if self.data_set == 'train': + X = X_train + y = y_train + elif self.data_set == 'train-small': + X = X_train[:2000] + y = y_train[:2000] + elif self.data_set == 'test': + X = X_test + y = y_test + elif self.data_set == 'test-small': + X = X_test[:1000] + y = y_test[:1000] + elif self.data_set == 'all': + X = np.vstack((X_train, X_test)) + y = np.vstack((y_train, y_test)) + else: + raise ValueError('MNIST Unsupported data_set: ', self.data_set) + + # normalization + if self.norm: + X = X.astype(np.float32) / 255 + X = X[:,np.newaxis,:,:] + X = self.init_layout_X(X) + y = self.init_layout_y(y) + self.X = X + self.y = y + diff --git a/lib/gcforest/datasets/olivetti_face.py b/lib/gcforest/datasets/olivetti_face.py new file mode 100644 index 0000000..2282c82 --- /dev/null +++ b/lib/gcforest/datasets/olivetti_face.py @@ -0,0 +1,41 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. +A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. +The implementation is flexible enough for modifying the model or fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np +from sklearn.datasets import fetch_olivetti_faces +from sklearn.model_selection import train_test_split + +from .ds_base import ds_base + +def load_data(train_num, train_repeat): + test_size = (10. - train_num) / 10 + data = fetch_olivetti_faces() + X = data.images + y = data.target + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=3, stratify=y) + if train_repeat > 1: + X_train = X_train.repeat(train_repeat, axis=0) + y_train = y_train.repeat(train_repeat) + return X_train, y_train, X_test, y_test + +class OlivettiFace(ds_base): + def __init__(self, train_num=5, train_repeat=1, **kwargs): + """ + train_num: int + """ + super(OlivettiFace, self).__init__(**kwargs) + + X_train, y_train, X_test, y_test = load_data(train_num, train_repeat) + X, y = self.get_data_by_imageset(X_train, y_train, X_test, y_test) + + X = X[:,np.newaxis,:,:] + X = self.init_layout_X(X) + y = self.init_layout_y(y) + self.X = X + self.y = y diff --git a/lib/gcforest/datasets/uci_adult.py b/lib/gcforest/datasets/uci_adult.py new file mode 100644 index 0000000..c57550f --- /dev/null +++ b/lib/gcforest/datasets/uci_adult.py @@ -0,0 +1,101 @@ +# encoding: utf-8 +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import os.path as osp +import numpy as np + +from .ds_base import ds_base +from .ds_base import get_dataset_base + +class FeatureParser(object): + def __init__(self, desc): + desc = desc.strip() + if desc == "C": + self.f_type = "number" + else: + self.f_type = "categorical" + f_names = [d.strip() for d in desc.split(",")] + # missing value + f_names.insert(0, "?") + self.name2id = dict(zip(f_names, range(len(f_names)))) + + def get_float(self, f_data): + f_data = f_data.strip() + if self.f_type == "number": + return float(f_data) + return float(self.name2id[f_data]) + + def get_data(self, f_data): + f_data = f_data.strip() + if self.f_type == "number": + return float(f_data) + data = np.zeros(len(self.name2id), dtype=np.float32) + data[self.name2id[f_data]] = 1 + return data + + def get_fdim(self): + """ + get feature dimension + """ + if self.f_type == "number": + return 1 + return len(self.name2id) + + +def load_data(data_set, cate_as_onehot): + if data_set == "train": + data_path = osp.join(get_dataset_base(), "uci_adult", "adult.data") + elif data_set == "test": + data_path = osp.join(get_dataset_base(), "uci_adult", "adult.test") + else: + raise ValueError("Unkown data_set: ", data_set) + f_parsers = [] + feature_desc_path = osp.join(get_dataset_base(), "uci_adult", "features") + with open(feature_desc_path) as f: + for row in f.readlines(): + f_parsers.append(FeatureParser(row)) + + with open(data_path) as f: + rows = [row.strip().split(",") for row in f.readlines() if len(row.strip()) > 0 and not row.startswith("|")] + n_datas = len(rows) + if cate_as_onehot: + X_dim = np.sum([f_parser.get_fdim() for f_parser in f_parsers]) + X = np.zeros((n_datas, X_dim), dtype=np.float32) + else: + X = np.zeros((n_datas, 14), dtype=np.float32) + y = np.zeros(n_datas, dtype=np.int32) + for i, row in enumerate(rows): + assert len(row) == 15, "len(row) wrong, i={}".format(i) + foffset = 0 + for j in range(14): + if cate_as_onehot: + fdim = f_parsers[j].get_fdim() + X[i, foffset:foffset+fdim] = f_parsers[j].get_data(row[j].strip()) + foffset += fdim + else: + X[i, j] = f_parsers[j].get_float(row[j].strip()) + y[i] = 0 if row[-1].strip().startswith("<=50K") else 1 + return X, y + + +class UCIAdult(ds_base): + def __init__(self, cate_as_onehot=0, **kwargs): + """ + cate_as_number: 是否把包含离散取值的feature用onehot表示 + """ + super(UCIAdult, self).__init__(**kwargs) + if self.conf is not None: + cate_as_onehot = int(self.conf.get("cate_as_onehot", 0)) + X, y = load_data(self.data_set, cate_as_onehot) + + X = X[:,np.newaxis,:,np.newaxis] + X = self.init_layout_X(X) + y = self.init_layout_y(y) + self.X = X + self.y = y diff --git a/lib/gcforest/datasets/uci_letter.py b/lib/gcforest/datasets/uci_letter.py new file mode 100644 index 0000000..c0caa97 --- /dev/null +++ b/lib/gcforest/datasets/uci_letter.py @@ -0,0 +1,40 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import os.path as osp +import numpy as np + +from .ds_base import ds_base +from .ds_base import get_dataset_base + + +def load_data(): + data_path = osp.join(get_dataset_base(), "uci_letter", "letter-recognition.data") + with open(data_path) as f: + rows = [row.strip().split(',') for row in f.readlines()] + n_datas = len(rows) + X = np.zeros((n_datas, 16), dtype=np.float32) + y = np.zeros(n_datas, dtype=np.int32) + for i, row in enumerate(rows): + X[i, :] = list(map(float, row[1:])) + y[i] = ord(row[0]) - ord('A') + X_train, y_train = X[:16000], y[:16000] + X_test, y_test = X[16000:], y[16000:] + return X_train, y_train, X_test, y_test + +class UCILetter(ds_base): + def __init__(self, **kwargs): + super(UCILetter, self).__init__(**kwargs) + X_train, y_train, X_test, y_test = load_data() + X, y = self.get_data_by_imageset(X_train, y_train, X_test, y_test) + + X = X[:,np.newaxis,:,np.newaxis] + X = self.init_layout_X(X) + y = self.init_layout_y(y) + self.X = X + self.y = y diff --git a/lib/gcforest/datasets/uci_semg.py b/lib/gcforest/datasets/uci_semg.py new file mode 100644 index 0000000..1c248c2 --- /dev/null +++ b/lib/gcforest/datasets/uci_semg.py @@ -0,0 +1,74 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import os.path as osp +import numpy as np +import scipy.io as sio +from sklearn.model_selection import train_test_split + +from .ds_base import ds_base +from .ds_base import get_dataset_base + +move2label = {} +move2label['spher_ch1'] = 0 +move2label['spher_ch2'] = 0 +move2label['tip_ch1'] = 1 +move2label['tip_ch2'] = 1 +move2label['palm_ch1'] = 2 +move2label['palm_ch2'] = 2 +move2label['lat_ch1'] = 3 +move2label['lat_ch2'] = 3 +move2label['cyl_ch1'] = 4 +move2label['cyl_ch2'] = 4 +move2label['hook_ch1'] = 5 +move2label['hook_ch2'] = 5 + +def load_mat(mat_path): + X = None + y = None + data = sio.loadmat(mat_path) + for k in sorted(move2label.keys()): + X_cur = data[k] + y_cur = np.full(X_cur.shape[0], move2label[k], dtype=np.int32) + if X is None: + X, y = X_cur, y_cur + else: + X = np.vstack((X, X_cur)) + y = np.concatenate((y, y_cur)) + return X, y + +def load_data(): + db_base = osp.join(get_dataset_base(), 'uci_semg', 'Database 1') + X = None + y = None + for mat_name in ('female_1.mat', 'female_2.mat', 'female_3.mat', 'male_1.mat', 'male_2.mat'): + X_cur, y_cur = load_mat(osp.join(db_base, mat_name)) + if X is None: + X, y = X_cur, y_cur + else: + X = np.vstack((X, X_cur)) + y = np.concatenate((y, y_cur)) + n_datas = X.shape[0] + train_idx, test_idx = train_test_split(range(n_datas), random_state=0, + train_size=0.7, stratify=y) + return (X[train_idx], y[train_idx]), (X[test_idx], y[test_idx]) + +class UCISEMG(ds_base): + def __init__(self, **kwargs): + super(UCISEMG, self).__init__(**kwargs) + (X_train, y_train), (X_test, y_test) = load_data() + X, y = self.get_data_by_imageset(X_train, y_train, X_test, y_test) + + X = X[:,np.newaxis,:,np.newaxis] + if self.layout_x == 'lstm': + X = X.reshape((X.shape[0], -1, 6)).transpose((0, 2, 1)) + else: + X = self.init_layout_X(X) + y = self.init_layout_y(y) + self.X = X + self.y = y diff --git a/lib/gcforest/datasets/uci_yeast.py b/lib/gcforest/datasets/uci_yeast.py new file mode 100644 index 0000000..ee6ad39 --- /dev/null +++ b/lib/gcforest/datasets/uci_yeast.py @@ -0,0 +1,64 @@ +# encoding: utf-8 +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np +import os.path as osp +import re +from sklearn.model_selection import train_test_split + +from .ds_base import ds_base, get_dataset_base + + +def load_data(): + id2label = {} + label2id = {} + label_path = osp.abspath( osp.join(get_dataset_base(), "uci_yeast", "yeast.label") ) + with open(label_path) as f: + for row in f: + cols = row.strip().split(" ") + id2label[int(cols[0])] = cols[1] + label2id[cols[1]] = int(cols[0]) + + data_path = osp.abspath( osp.join(get_dataset_base(), "uci_yeast", "yeast.data") ) + with open(data_path) as f: + rows = f.readlines() + n_datas = len(rows) + X = np.zeros((n_datas, 8), dtype=np.float32) + y = np.zeros(n_datas, dtype=np.int32) + for i, row in enumerate(rows): + cols = re.split(" +", row.strip()) + #print(list(map(float, cols[1:1+8]))) + X[i,:] = list(map(float, cols[1:1+8])) + y[i] = label2id[cols[-1]] + train_idx, test_idx = train_test_split(range(n_datas), random_state=0, train_size=0.7, stratify=y) + return (X[train_idx], y[train_idx]), (X[test_idx], y[test_idx]) + + +class UCIYeast(ds_base): + def __init__(self, **kwargs): + super(UCIYeast, self).__init__(**kwargs) + (X_train, y_train), (X_test, y_test) = load_data() + if self.data_set == "train": + X = X_train + y = y_train + elif self.data_set == "test": + X = X_test + y = y_test + elif self.data_set == "all": + X = np.vstack((X_train, X_test)) + y = np.vstack((y_train, y_test)) + else: + raise ValueError("YEAST Unsupported data_set: ", self.data_set) + + X = X[:,np.newaxis,:,np.newaxis] + X = self.init_layout_X(X) + y = self.init_layout_y(y) + self.X = X + self.y = y + diff --git a/lib/gcforest/estimators/__init__.py b/lib/gcforest/estimators/__init__.py new file mode 100644 index 0000000..105aeb2 --- /dev/null +++ b/lib/gcforest/estimators/__init__.py @@ -0,0 +1,29 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +from .base_estimator import BaseClassifierWrapper +from .sklearn_estimators import GCExtraTreesClassifier, GCRandomForestClassifier +#from .xgb_estimator import GCXGBClassifier +from .kfold_wrapper import KFoldWrapper + +def get_estimator_class(est_type): + if est_type == "ExtraTreesClassifier": + return GCExtraTreesClassifier + if est_type == "RandomForestClassifier": + return GCRandomForestClassifier + #if est_type == "XGBClassifier": + # return GCXGBClassifier + raise ValueError('Unkown Estimator Type, est_type={}'.format(est_type)) + +def get_estimator(name, est_type, est_args): + est_class = get_estimator_class(est_type) + return est_class(name, est_args) + +def get_estimator_kfold(name, n_splits, est_type, est_args, random_state=None): + est_class = get_estimator_class(est_type) + return KFoldWrapper(name, n_splits, est_class, est_args, random_state=random_state) diff --git a/lib/gcforest/estimators/base_estimator.py b/lib/gcforest/estimators/base_estimator.py new file mode 100644 index 0000000..58ab80b --- /dev/null +++ b/lib/gcforest/estimators/base_estimator.py @@ -0,0 +1,134 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import os, os.path as osp +import numpy as np + +from ..utils.log_utils import get_logger +from ..utils.cache_utils import name2path + +LOGGER = get_logger("gcforest.estimators.base_estimator") + +def check_dir(path): + d = osp.abspath(osp.join(path, osp.pardir)) + if not osp.exists(d): + os.makedirs(d) + +class BaseClassifierWrapper(object): + def __init__(self, name, est_class, est_args): + """ + name: str) + Used for debug and as the filename this model may be saved in the disk + """ + self.name = name + self.est_class = est_class + self.est_args = est_args + self.cache_suffix = ".pkl" + self.est = None + + def _init_estimator(self): + """ + You can re-implement this function when inherient this class + """ + est = self.est_class(**self.est_args) + return est + + def fit(self, X, y, cache_dir=None): + """ + cache_dir(str): + if not None + then if there is something in cache_dir, dont have fit the thing all over again + otherwise, fit it and save to model cache + """ + LOGGER.debug("X_train.shape={}, y_train.shape={}".format(X.shape, y.shape)) + cache_path = self._cache_path(cache_dir) + # cache + if self._is_cache_exists(cache_path): + LOGGER.info("Find estimator from {} . skip process".format(cache_path)) + return + est = self._init_estimator() + self._fit(est, X, y) + if cache_path is not None: + # saved in disk + LOGGER.info("Save estimator to {} ...".format(cache_path)) + check_dir(cache_path); + self._save_model_to_disk(self.est, cache_path) + self.est = None + else: + # keep in memory + self.est = est + + def predict_proba(self, X, cache_dir=None, batch_size=None): + LOGGER.debug("X.shape={}".format(X.shape)) + cache_path = self._cache_path(cache_dir) + # cache + if cache_path is not None: + LOGGER.info("Load estimator from {} ...".format(cache_path)) + est = self._load_model_from_disk(cache_path) + LOGGER.info("done ...") + else: + est = self.est + batch_size = batch_size or self._default_predict_batch_size(est, X) + if batch_size > 0: + y_proba = self._batch_predict_proba(est, X, batch_size) + else: + y_proba = self._predict_proba(est, X) + LOGGER.debug("y_proba.shape={}".format(y_proba.shape)) + return y_proba + + def _cache_path(self, cache_dir): + if cache_dir is None: + return None + return osp.join(cache_dir, name2path(self.name) + self.cache_suffix) + + def _is_cache_exists(self, cache_path): + return cache_path is not None and osp.exists(cache_path) + + def _batch_predict_proba(self, est, X, batch_size): + LOGGER.debug("X.shape={}, batch_size={}".format(X.shape, batch_size)) + if hasattr(est, "verbose"): + verbose_backup = est.verbose + est.verbose = 0 + n_datas = X.shape[0] + y_pred_proba = None + for j in range(0, n_datas, batch_size): + LOGGER.info("[progress][batch_size={}] ({}/{})".format(batch_size, j, n_datas)) + y_cur = self._predict_proba(est, X[j:j+batch_size]) + if j == 0: + n_classes = y_cur.shape[1] + y_pred_proba = np.empty((n_datas, n_classes), dtype=np.float32) + y_pred_proba[j:j+batch_size,:] = y_cur + if hasattr(est, "verbose"): + est.verbose = verbose_backup + return y_pred_proba + + def _load_model_from_disk(self, cache_path): + raise NotImplementedError() + + def _save_model_to_disk(self, est, cache_path): + raise NotImplementedError() + + def _default_predict_batch_size(self, est, X): + """ + You can re-implement this function when inherient this class + + Return + ------ + predict_batch_size (int): default=0 + if = 0, predict_proba without batches + if > 0, then predict_proba without baches + sklearn predict_proba is not so inefficient, has to do this + """ + return 0 + + def _fit(self, est, X, y): + est.fit(X, y) + + def _predict_proba(self, est, X): + return est.predict_proba(X) diff --git a/lib/gcforest/estimators/est_utils.py b/lib/gcforest/estimators/est_utils.py new file mode 100644 index 0000000..d98ca1a --- /dev/null +++ b/lib/gcforest/estimators/est_utils.py @@ -0,0 +1,31 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np +from ..utils.log_utils import get_logger + +LOGGER = get_logger('gcforest.estimators.est_utils') + +def xgb_train(train_config, X_train, y_train, X_test, y_test): + import xgboost as xgb + LOGGER.info("X_train.shape={}, y_train.shape={}, X_test.shape={}, y_test.shape={}".format( + X_train.shape, y_train.shape, X_test.shape, y_test.shape)) + param = train_config["param"] + xg_train = xgb.DMatrix(X_train, label=y_train) + xg_test = xgb.DMatrix(X_test, label=y_test) + num_round = int(train_config["num_round"]) + watchlist = [(xg_train, 'train'), (xg_test, 'test')] + try: + bst = xgb.train(param, xg_train, num_round, watchlist) + except KeyboardInterrupt: + LOGGER.info("Canceld by user's Ctrl-C action") + return + y_pred = np.argmax(bst.predict(xg_test), axis=1) + acc = 100. * np.sum(y_pred == y_test) / len(y_test) + LOGGER.info("accuracy={}%".format(acc)) diff --git a/lib/gcforest/estimators/kfold_wrapper.py b/lib/gcforest/estimators/kfold_wrapper.py new file mode 100644 index 0000000..20a158a --- /dev/null +++ b/lib/gcforest/estimators/kfold_wrapper.py @@ -0,0 +1,175 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import os, os.path as osp +import numpy as np +from sklearn.model_selection import KFold, StratifiedKFold + +from ..utils.log_utils import get_logger +from ..utils.cache_utils import name2path + +LOGGER = get_logger("gcforest.estimators.kfold_wrapper") + +class KFoldWrapper(object): + """ + K-Fold Wrapper + """ + def __init__(self, name, n_folds, est_class, est_args, random_state=None): + """ + Parameters + ---------- + n_folds (int): + Number of folds. + If n_folds=1, means no K-Fold + est_class (class): + Class of estimator + est_args (dict): + Arguments of estimator + random_state (int): + random_state used for KFolds split and Estimator + """ + self.name = name + self.n_folds = n_folds + self.est_class = est_class + self.est_args = est_args + self.random_state = random_state + self.estimator1d = [None for k in range(self.n_folds)] + + def _init_estimator(self, k): + est_args = self.est_args.copy() + est_name = "{}/{}".format(self.name, k) + est_args["random_state"] = self.random_state + return self.est_class(est_name, est_args) + + def fit_transform(self, X, y, y_stratify, cache_dir=None, test_sets=None, eval_metrics=None, keep_model_in_mem=True): + """ + X (ndarray): + n x k or n1 x n2 x k + to support windows_layer, X could have dim >2 + y (ndarray): + n or n1 x n2 + y_stratify (list): + used for StratifiedKFold or None means no stratify + test_sets (list): optional + A list of (prefix, X_test, y_test) pairs. + predict_proba for X_test will be returned + use with keep_model_in_mem=False to save mem useage + y_test could be None, otherwise use eval_metrics for debugging + eval_metrics (list): optional + A list of (str, callable functions) + keep_model_in_mem (bool): + """ + if cache_dir is not None: + cache_dir = osp.join(cache_dir, name2path(self.name)) + assert 2 <= len(X.shape) <= 3, "X.shape should be n x k or n x n2 x k" + assert len(X.shape) == len(y.shape) + 1 + assert X.shape[0] == len(y_stratify) + test_sets = test_sets if test_sets is not None else [] + eval_metrics = eval_metrics if eval_metrics is not None else [] + # K-Fold split + n_stratify = X.shape[0] + if self.n_folds == 1: + cv = [(range(len(X)), range(len(X)))] + else: + if y_stratify is None: + skf = KFold(n_splits=self.n_folds, shuffle=True, random_state=self.random_state) + cv = [(t, v) for (t, v) in skf.split(len(n_stratify))] + else: + skf = StratifiedKFold(n_splits=self.n_folds, shuffle=True, random_state=self.random_state) + cv = [(t, v) for (t, v) in skf.split(range(n_stratify), y_stratify)] + # Fit + y_probas = [] + n_dims = X.shape[-1] + n_datas = X.size / n_dims + inverse = False + for k in range(self.n_folds): + est = self._init_estimator(k) + if not inverse: + train_idx, val_idx = cv[k] + else: + val_idx, train_idx = cv[k] + # fit on k-fold train + est.fit(X[train_idx].reshape((-1, n_dims)), y[train_idx].reshape(-1), cache_dir=cache_dir) + + # predict on k-fold validation + y_proba = est.predict_proba(X[val_idx].reshape((-1, n_dims)), cache_dir=cache_dir) + if len(X.shape) == 3: + y_proba = y_proba.reshape((len(val_idx), -1, y_proba.shape[-1])) + self.log_eval_metrics(self.name, y[val_idx], y_proba, eval_metrics, "train_{}".format(k)) + + # merging result + if k == 0: + if len(X.shape) == 2: + y_proba_cv = np.zeros((n_stratify, y_proba.shape[1]), dtype=np.float32) + else: + y_proba_cv = np.zeros((n_stratify, y_proba.shape[1], y_proba.shape[2]), dtype=np.float32) + y_probas.append(y_proba_cv) + y_probas[0][val_idx, :] += y_proba + if keep_model_in_mem: + self.estimator1d[k] = est + + # test + for vi, (prefix, X_test, y_test) in enumerate(test_sets): + y_proba = est.predict_proba(X_test.reshape((-1, n_dims)), cache_dir=cache_dir) + if len(X.shape) == 3: + y_proba = y_proba.reshape((X_test.shape[0], X_test.shape[1], y_proba.shape[-1])) + if k == 0: + y_probas.append(y_proba) + else: + y_probas[vi + 1] += y_proba + if inverse and self.n_folds > 1: + y_probas[0] /= (self.n_folds - 1) + for y_proba in y_probas[1:]: + y_proba /= self.n_folds + # log + self.log_eval_metrics(self.name, y, y_probas[0], eval_metrics, "train") + for vi, (test_name, X_test, y_test) in enumerate(test_sets): + if y_test is not None: + self.log_eval_metrics(self.name, y_test, y_probas[vi + 1], eval_metrics, test_name) + return y_probas + + def log_eval_metrics(self, est_name, y_true, y_proba, eval_metrics, y_name): + """ + y_true (ndarray): n or n1 x n2 + y_proba (ndarray): n x n_classes or n1 x n2 x n_classes + """ + if eval_metrics is None: + return + for (eval_name, eval_metric) in eval_metrics: + accuracy = eval_metric(y_true, y_proba) + LOGGER.info("Accuracy({}.{}.{})={:.2f}%".format(est_name, y_name, eval_name, accuracy * 100.)) + + def predict_proba(self): + ## check top cache + #top = self.data_cache.get(phase, top_name, ignore_no_exist=True) + #if top is not None: + # LOGGER.info("[data] top cache exists, skip process. tops[ti].shape={}".format(top.shape)) + # return + ## init X + #if X is None: + # bottoms = self.data_cache.gets(phase, self.bottom_names[:-1]) + # LOGGER.info('[data] name={}, bottoms.shape={}'.format(self.name, repr_blobs_shape(bottoms))) + # X = np.concatenate(bottoms, axis=1) + # n, c, h, w = X.shape + # X, nh, nw = get_windows(X, self.win_x, self.win_y, self.stride_x, self.stride_y, self.pad_x, self.pad_y) + + ## predict + #estimator1d = self.estimator2d[ti] + #y_proba_cv = None + #for k, est in enumerate(estimator1d): + # model_cache_path = self.model_cache_path("{}-cv_{}_{}".format(top_name, k, self.n_folds)) + # y_proba = est.predict_proba(X, model_cache_path) + # if y_proba_cv is None: + # y_proba_cv = y_proba + # else: + # y_proba_cv += y_proba + #y_proba_cv /= self.n_folds + #y_proba_cv = y_proba_cv.reshape((n, nh, nw, self.n_classes)).transpose((0, 3, 1, 2)) + #return y_proba_cv + pass diff --git a/lib/gcforest/estimators/sklearn_estimators.py b/lib/gcforest/estimators/sklearn_estimators.py new file mode 100644 index 0000000..644728b --- /dev/null +++ b/lib/gcforest/estimators/sklearn_estimators.py @@ -0,0 +1,52 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +from sklearn.externals import joblib + +from .base_estimator import BaseClassifierWrapper +from ..utils.log_utils import get_logger + +LOGGER = get_logger('gcforest.estimators.sklearn_estimators') + +def forest_predict_batch_size(clf, X): + import psutil + free_memory = psutil.virtual_memory().free + if free_memory < 2e9: + free_memory = int(2e9) + max_mem_size = max(int(free_memory * 0.5), int(8e10)) + mem_size_1 = clf.n_classes_ * clf.n_estimators * 16 + batch_size = (max_mem_size - 1) / mem_size_1 + 1 + if batch_size < 10: + batch_size = 10 + if batch_size >= X.shape[0]: + return 0 + return batch_size + +class SKlearnBaseClassifier(BaseClassifierWrapper): + def _load_model_from_disk(self, cache_path): + return joblib.load(cache_path) + + def _save_model_to_disk(self, clf, cache_path): + joblib.dump(clf, cache_path) + +class GCExtraTreesClassifier(SKlearnBaseClassifier): + def __init__(self, name, kwargs): + from sklearn.ensemble import ExtraTreesClassifier + super(GCExtraTreesClassifier, self).__init__(name, ExtraTreesClassifier, kwargs) + + def _default_predict_batch_size(self, clf, X): + return forest_predict_batch_size(clf, X) + +class GCRandomForestClassifier(SKlearnBaseClassifier): + def __init__(self, name, kwargs): + from sklearn.ensemble import RandomForestClassifier + super(GCRandomForestClassifier, self).__init__(name, RandomForestClassifier, kwargs) + + def _default_predict_batch_size(self, clf, X): + return forest_predict_batch_size(clf, X) diff --git a/lib/gcforest/exp_utils.py b/lib/gcforest/exp_utils.py new file mode 100644 index 0000000..cb9357f --- /dev/null +++ b/lib/gcforest/exp_utils.py @@ -0,0 +1,150 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np +from scipy.sparse import issparse + +from .utils.log_utils import get_logger + +LOGGER = get_logger('gcforest.exp_utils') + +def concat_datas(datas): + if type(datas) != list: + return datas + for i, data in enumerate(datas): + datas[i] = data.reshape((data.shape[0], -1)) + return np.concatenate(datas, axis=1) + +def data_norm(X_train, X_test): + X_mean = np.mean(X_train, axis=0) + X_std = np.std(X_train, axis=0) + X_train -= X_mean + X_train /= X_std + X_test -= X_mean + X_test /= X_std + return X_mean, X_std + +def append_origin(X, X_origin): + return np.hstack(( X.reshape((X.shape[0]), -1), X_origin.reshape((X_origin.shape[0], -1)) )) + +def prec_ets(n_trees, X_train, y_train, X_test, y_test, random_state=None): + """ + ExtraTrees + """ + from sklearn.ensemble import ExtraTreesClassifier + if not issparse(X_train): + X_train = X_train.reshape((X_train.shape[0], -1)) + if not issparse(X_test): + X_test = X_test.reshape((X_test.shape[0], -1)) + LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format( + n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape)) + clf = ExtraTreesClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1, random_state=random_state) + clf.fit(X_train, y_train) + y_pred = clf.predict(X_test) + prec = float(np.sum(y_pred == y_test)) / len(y_test) + LOGGER.info('prec_ets{}={:.6f}%'.format(n_trees, prec*100.0)) + return clf, y_pred + +def prec_rf(n_trees, X_train, y_train, X_test, y_test): + """ + ExtraTrees + """ + from sklearn.ensemble import RandomForestClassifier + if not issparse(X_train): + X_train = X_train.reshape((X_train.shape[0], -1)) + if not issparse(X_test): + X_test = X_test.reshape((X_test.shape[0], -1)) + LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format( + n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape)) + clf = RandomForestClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1) + clf.fit(X_train, y_train) + y_pred = clf.predict(X_test) + prec = float(np.sum(y_pred == y_test)) / len(y_test) + LOGGER.info('prec_rf{}={:.6f}%'.format(n_trees, prec*100.0)) + return clf, y_pred + +def xgb_eval_accuracy(y_pred_proba, y_true): + """ + y_true (DMatrix) + """ + y_pred = np.argmax(y_pred_proba, axis=1) + y_true = y_true.get_label() + acc = float(np.sum(y_pred == y_true)) / len(y_pred) + return 'accuracy', -acc + +def prec_xgb(n_trees, max_depth, X_train, y_train, X_test, y_test, learning_rate=0.1): + """ + ExtraTrees + """ + import xgboost as xgb + X_train = X_train.reshape((X_train.shape[0], -1)) + X_test = X_test.reshape((X_test.shape[0], -1)) + LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format( + n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape)) + clf = xgb.XGBClassifier(n_estimators=n_trees, max_depth=max_depth, objective='multi:softprob', + seed=0, silent=True, nthread=-1, learning_rate=learning_rate) + eval_set = [(X_test, y_test)] + clf.fit(X_train, y_train, eval_set=eval_set, eval_metric="merror") + y_pred = clf.predict(X_test) + prec = float(np.sum(y_pred == y_test)) / len(y_test) + LOGGER.info('prec_xgb_{}={:.6f}%'.format(n_trees, prec*100.0)) + return clf, y_pred + +def prec_log(X_train, y_train, X_test, y_test): + from sklearn.linear_model import LogisticRegression + if not issparse(X_train): + X_train = X_train.reshape((X_train.shape[0], -1)) + if not issparse(X_test): + X_test = X_test.reshape((X_test.shape[0], -1)) + LOGGER.info('start predict: X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format( + X_train.shape, y_train.shape, X_test.shape, y_test.shape)) + X_train = X_train.reshape((X_train.shape[0], -1)) + X_test = X_test.reshape((X_test.shape[0], -1)) + clf = LogisticRegression(solver='sag', n_jobs=-1, verbose=1) + clf.fit(X_train, y_train) + y_pred = clf.predict(X_test) + prec = float(np.sum(y_pred == y_test)) / len(y_test) + LOGGER.info('prec_log={:.6f}%'.format(prec*100.0)) + return clf, y_pred + +def plot_forest_all_proba(y_proba_all, y_gt): + from matplotlib import pylab + N = len(y_gt) + num_tree = len(y_proba_all) + pylab.clf() + mat = np.zeros((num_tree, N)) + LOGGER.info('mat.shape={}'.format(mat.shape)) + for i in range(num_tree): + mat[i,:] = y_proba_all[i][(range(N), y_gt)] + pylab.matshow(mat, fignum=False, cmap='Blues', vmin=0, vmax=1.0) + pylab.grid(False) + pylab.show() + +def plot_confusion_matrix(cm, label_list, title='Confusion matrix', cmap=None): + from matplotlib import pylab + cm = np.asarray(cm, dtype=np.float32) + for i, row in enumerate(cm): + cm[i] = cm[i] / np.sum(cm[i]) + #import matplotlib.pyplot as plt + #plt.ion() + pylab.clf() + pylab.matshow(cm, fignum=False, cmap='Blues', vmin=0, vmax=1.0) + ax = pylab.axes() + ax.set_xticks(range(len(label_list))) + ax.set_xticklabels(label_list, rotation='vertical') + ax.xaxis.set_ticks_position('bottom') + ax.set_yticks(range(len(label_list))) + ax.set_yticklabels(label_list) + pylab.title(title) + pylab.colorbar() + pylab.grid(False) + pylab.xlabel('Predicted class') + pylab.ylabel('True class') + pylab.grid(False) + pylab.savefig('test.jpg') + pylab.show() diff --git a/lib/gcforest/fgnet.py b/lib/gcforest/fgnet.py new file mode 100644 index 0000000..d137bff --- /dev/null +++ b/lib/gcforest/fgnet.py @@ -0,0 +1,147 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import os, os.path as osp +import json + +from .data_cache import DataCache +from .layers import get_layer +from .utils.log_utils import get_logger + +LOGGER = get_logger("gcforest.gcnet") + +class FGNet(object): + """ + GCForest : FineGrained Components + """ + def __init__(self, net_config, data_cache): + #net_config_str = json.dumps(net_config, sort_keys=True, indent=4, separators=(',', ':')) + #LOGGER.info("\n" + net_config_str) + self.data_cache = data_cache + self.inputs = net_config.get("inputs", []) + self.check_net_config(net_config) + self.outputs = net_config.get("outputs", []) + + # layers + self.layers = [] + self.name2layer = {} + model_disk_base = net_config.get("model_cache", {}).get("disk_base", None) + for layer_config in net_config["layers"]: + layer = get_layer(layer_config, self.data_cache) + layer.model_disk_base = model_disk_base + self.layers.append(layer) + self.name2layer[layer.name] = layer + + + def fit_transform(self, X_train, y_train, X_test, y_test, train_config): + """ + delete_layer (bool): defalut=False + When X_test is not None and there is no need to run test, delete layer in realtime to save mem + + """ + LOGGER.info("X_train.shape={}, y_train.shape={}, X_test.shape={}, y_test.shape={}".format( + X_train.shape, y_train.shape, None if X_test is None else X_test.shape, None if y_test is None else y_test.shape)) + self.update_xy("train", X_train, y_train) + self.update_xy("test", X_test, y_test) + for li, layer in enumerate(self.layers): + layer.fit_transform(train_config) + + def score(self): + for li, layer in enumerate(self.layers): + layer.score() + + def update_xy(self, phase, X, y): + self.data_cache.update(phase, "X", X) + self.data_cache.update(phase, "y", y) + + def get_outputs(self, phase): + outputs = self.data_cache.gets(phase, self.outputs) + return outputs + + def save_outputs(self, phase, save_y=True, save_path=None): + if save_path is None: + if self.data_cache.cache_dir is None: + LOGGER.error("save path is None and data_cache.cache_dir is None!!! don't know where to save") + return + save_path = osp.join(self.data_cache.cache_dir, phase, "outputs.pkl") + import pickle + info = "" + data_names = [name for name in self.outputs] + if save_y: + data_names.append("y") + datas = {} + for di, data_name in enumerate(data_names): + datas[data_name] = self.data_cache.get(phase, data_name) + info = "{},{}->{}".format(info, data_name, datas[data_name].shape) + LOGGER.info("outputs.shape={}".format(info)) + LOGGER.info("Saving Outputs in {} ".format(save_path)) + with open(save_path, "wb") as f: + pickle.dump(datas, f, pickle.HIGHEST_PROTOCOL) + + def check_net_config(self, net_config): + """ + check net_config + """ + + top2layer = {} + name2layer = {} + for li, layer_config in enumerate(net_config["layers"]): + layer_name = layer_config["name"] + if layer_name in name2layer: + raise ValueError("layer name duplicate. layer_name={}, config1={}, config2={}".format( + layer_name, name2layer[layer_name], layer_config)) + name2layer[layer_name] = layer_config + + for bottom in layer_config["bottoms"]: + if bottom != "X" and bottom != "y" and not bottom in self.inputs and not bottom in top2layer: + raise ValueError("li={}, layer_config={}, bottom({}) doesn't be produced by other layers".format( + li, layer_config, bottom)) + for top in layer_config["tops"]: + if top in top2layer: + raise ValueError("top duplicate. layer({}) and layer({}) have same top blob: {}".format( + top2layer[top], layer_config["name"], top)) + top2layer[top] = layer_config["name"] + + outputs = net_config.get("outputs", []) + if len(outputs) == 0: + LOGGER.warn("outputs list is empty!!!") + for output in outputs: + if output == "X" or output == "y" or output in self.inputs or output in top2layer: + continue + raise ValueError("output data name not exist: output={}".format(output)) + + for layer_config in net_config["layers"]: + if len(layer_config["tops"]) > 1: + for top_name in layer_config["tops"]: + if not top_name.startswith(layer_config["name"]): + LOGGER.warn("top_name is suggested to startswith layer_name: layer_config={}".format(layer_config)) + else: + top = layer_config["tops"][0] + if top != layer_config["name"]: + LOGGER.warn("layer_name != top_name, You should check to make sure this is what you want!!! layer_config={}".format(layer_config)) + +def strip(s): + if s is None: return None + s = s.strip() + if len(s) == 0: + return None + return s + +class FGTrainConfig(object): + def __init__(self, train_config): + self.keep_model_in_mem = train_config.get("keep_model_in_mem", 0) + self.random_state = train_config.get("random_state", 0) + self.model_cache_dir = strip(train_config.get("model_cache_dir", None)) + self.data_cache = DataCache(train_config["data_cache"]) + + for data_name in ("X", "y"): + if not data_name in self.data_cache.config["keep_in_mem"]: + self.data_cache.config["keep_in_mem"][data_name] = 1 + if not data_name in self.data_cache.config["cache_in_disk"]: + self.data_cache.config["cache_in_disk"][data_name] = 0 diff --git a/lib/gcforest/layers/__init__.py b/lib/gcforest/layers/__init__.py new file mode 100644 index 0000000..637a2b8 --- /dev/null +++ b/lib/gcforest/layers/__init__.py @@ -0,0 +1,33 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +from .base_layer import BaseLayer +from .fg_concat_layer import FGConcatLayer +from .fg_pool_layer import FGPoolLayer +from .fg_win_layer import FGWinLayer + +def get_layer_class(layer_type): + if layer_type == "FGWinLayer": + return FGWinLayer + if layer_type == "FGConcatLayer": + return FGConcatLayer + if layer_type == "FGPoolLayer": + return FGPoolLayer + raise ValueError("Unkown Layer Type: ", layer_type) + +def get_layer(layer_config, data_cache): + """ + layer_config (dict): config for layer + data_cache (gcforest.DataCache): DataCache + """ + layer_config = layer_config.copy() + layer_class = get_layer_class(layer_config["type"]) + layer_config.pop("type") + layer = layer_class(layer_config, data_cache) + return layer diff --git a/lib/gcforest/layers/base_layer.py b/lib/gcforest/layers/base_layer.py new file mode 100644 index 0000000..8d77c1d --- /dev/null +++ b/lib/gcforest/layers/base_layer.py @@ -0,0 +1,60 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import os.path as osp +import numpy as np + +from ..utils.log_utils import get_logger +from ..utils.config_utils import get_config_value + +LOGGER = get_logger('gcforest.layers.base_layer') + +class BaseLayer(object): + def __init__(self, layer_config, data_cache): + self.layer_config = layer_config + self.name = layer_config["name"] + self.bottom_names = layer_config["bottoms"] + self.top_names = layer_config["tops"] + self.data_cache = data_cache + + def get_value(self, key, default_value, value_types, required=False, config=None): + return get_config_value(config or self.layer_config, key, default_value, value_types, + required=required, config_name=self.name) + return value + + def check_top_cache(self, phases, ti): + """ + Check if top cache exists + + Parameters + --------- + phases: List of str + e.g. ["train", "test"] + ti: int + top index + + Return + ------ + exist_mask: List of bool + exist_mask[ti] represent tops[ti] is exist in cache (either keeped in memory or saved in disk) + """ + top_name = self.top_names[ti] + exist_mask = np.zeros(len(phases)) + for pi, phase in enumerate(phases): + top = self.data_cache.get(phase, top_name, ignore_no_exist=True) + exist_mask[pi] = top is not None + if top is not None: + LOGGER.info("[data][{},{}] top cache exists. tops[{}].shape={}".format(self.name, phase, ti, top.shape)) + return exist_mask + + def fit_transform(self, train_config): + raise NotImplementedError() + + def score(self): + pass diff --git a/lib/gcforest/layers/fg_concat_layer.py b/lib/gcforest/layers/fg_concat_layer.py new file mode 100644 index 0000000..21467fd --- /dev/null +++ b/lib/gcforest/layers/fg_concat_layer.py @@ -0,0 +1,50 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np + +from .base_layer import BaseLayer +from ..utils.debug_utils import repr_blobs_shape +from ..utils.log_utils import get_logger + +LOGGER = get_logger('gcforest.layers.fg_concat_layer') + +class FGConcatLayer(BaseLayer): + def __init__(self, layer_config, data_cache): + """ + Concat Layer + """ + super(FGConcatLayer, self).__init__(layer_config, data_cache) + self.axis = self.get_value("axis", -1, int) + assert(len(self.bottom_names) > 0) + assert(len(self.top_names) == 1) + + def fit_transform(self, train_config): + LOGGER.info("[data][{}] bottoms={}, tops={}".format(self.name, self.bottom_names, self.top_names)) + self._transform(["train", "test"]) + + def _transform(self, phases): + """ + bottoms: + for example: n x Ci x w x h + """ + for phase in phases: + # check top cache + if self.check_top_cache([phase], 0)[0]: + continue + bottoms = self.data_cache.gets(phase, self.bottom_names) + LOGGER.info('[data][{},{}] bottoms.shape={}'.format(self.name, phase, repr_blobs_shape(bottoms))) + if self.axis == -1: + for i, bottom in enumerate(bottoms): + bottoms[i] = bottom.reshape((bottom.shape[0], -1)) + concat_data = np.concatenate(bottoms, axis=1) + else: + concat_data = np.concatenate(bottoms, axis=self.axis) + LOGGER.info('[data][{},{}] tops[0].shape={}'.format(self.name, phase, concat_data.shape)) + self.data_cache.update(phase, self.top_names[0], concat_data) diff --git a/lib/gcforest/layers/fg_pool_layer.py b/lib/gcforest/layers/fg_pool_layer.py new file mode 100644 index 0000000..fad448d --- /dev/null +++ b/lib/gcforest/layers/fg_pool_layer.py @@ -0,0 +1,69 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np +#from tqdm import trange + +from .base_layer import BaseLayer +from ..utils.debug_utils import repr_blobs_shape +from ..utils.log_utils import get_logger + +LOGGER = get_logger('gcforest.layers.fg_pool_layer') + +class FGPoolLayer(BaseLayer): + def __init__(self, layer_config, data_cache): + """ + Pooling Layer (MaxPooling, AveragePooling) + """ + super(FGPoolLayer, self).__init__(layer_config, data_cache) + self.win_x = self.get_value("win_x", None, int, required=True) + self.win_y = self.get_value("win_y", None, int, required=True) + self.pool_method = self.get_value("pool_method", "avg", basestring) + + def fit_transform(self, train_config): + LOGGER.info("[data][{}] bottoms={}, tops={}".format(self.name, self.bottom_names, self.top_names)) + self._transform(["train", "test"]) + + def _transform(self, phases): + for ti, top_name in enumerate(self.top_names): + LOGGER.info("[progress][{}] ti={}/{}, top_name={}".format(ti, self.name, len(self.top_names), top_name)) + for phase in phases: + # check top cache + if self.check_top_cache([phase], ti)[0]: + continue + X = self.data_cache.get(phase, self.bottom_names[ti]) + LOGGER.info('[data][{},{}] bottoms[{}].shape={}'.format(self.name, phase, ti, X.shape)) + n, c, h, w = X.shape + win_x, win_y = self.win_x, self.win_y + #assert h % win_y == 0 + #assert w % win_x == 0 + #nh = int(h / win_y) + #nw = int(w / win_x) + nh = (h - 1) / win_y + 1 + nw = (w - 1) / win_x + 1 + X_pool = np.empty(( n, c, nh, nw), dtype=np.float32) + #for k in trange(c, desc='loop channel'): + # for di in trange(nh, desc='loop win_y'): + # for dj in trange(nw, desc='loop win_x'): + for k in range(c): + for di in range(nh): + for dj in range(nw): + si = di * win_y + sj = dj * win_x + src = X[:, k, si:si+win_y, sj:sj+win_x] + src = src.reshape((X.shape[0], -1)) + if self.pool_method == 'max': + X_pool[:, k, di, dj] = np.max(src, axis=1) + elif self.pool_method == 'avg': + X_pool[:, k, di, dj] = np.mean(src, axis=1) + else: + raise ValueError('Unkown Pool Method, pool_method={}'.format(self.pool_method)) + #print ('\n') + LOGGER.info('[data][{},{}] tops[{}].shape={}'.format(self.name, phase, ti, X_pool.shape)) + self.data_cache.update(phase, top_name, X_pool) diff --git a/lib/gcforest/layers/fg_win_layer.py b/lib/gcforest/layers/fg_win_layer.py new file mode 100644 index 0000000..4a34bb8 --- /dev/null +++ b/lib/gcforest/layers/fg_win_layer.py @@ -0,0 +1,126 @@ +# -*- coding:utf-8 -*- +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import numpy as np +from sklearn.model_selection import StratifiedKFold + +from .base_layer import BaseLayer +from ..estimators import get_estimator_kfold +from ..utils.metrics import accuracy_pb, accuracy_win_vote, accuracy_win_avg +from ..utils.win_utils import get_windows +from ..utils.debug_utils import repr_blobs_shape +from ..utils.log_utils import get_logger + +LOGGER = get_logger("gcforest.layers.fg_win_layer") + +#CV_POLICYS = ["data", "win"] +#CV_POLICYS = ["data"] + +class FGWinLayer(BaseLayer): + def __init__(self, layer_config, data_cache): + """ + est_config (dict): + estimator的config + win_x, win_y, stride_x, stride_y, pad_x, pad_y (int): + configs for windows + n_folds(int): default=1 + 1 means do not use k-fold + n_classes (int): + + """ + super(FGWinLayer, self).__init__(layer_config, data_cache) + # estimator + self.est_configs = self.get_value("estimators", None, list, required=True) + self.win_x = self.get_value("win_x", None, int, required=True) + self.win_y = self.get_value("win_y", None, int, required=True) + self.stride_x = self.get_value("stride_x", 1, int) + self.stride_y = self.get_value("stride_y", 1, int) + self.pad_x = self.get_value("pad_x", 0, int) + self.pad_y = self.get_value("pad_y", 0, int) + self.n_classes = self.get_value("n_classes", None, int, required=True) + #self.cv_policy = layer_config.get("cv_policy", "data") + #assert(self.cv_policy in CV_POLICYS) + assert len(self.bottom_names) >= 2 + assert len(self.est_configs) == len(self.top_names), "Each estimator shoud produce one unique top" + self.eval_metrics = [("predict", accuracy_pb), ("vote", accuracy_win_vote), ("avg", accuracy_win_avg)] + self.estimator1d = [None for ei in range(len(self.est_configs))] + + def _init_estimators(self, ei, random_state): + """ + ei (int): estimator index + """ + top_name = self.top_names[ei] + est_args = self.est_configs[ei].copy() + est_name ="{}/{}_folds".format(top_name, est_args["n_folds"]) + # n_folds + n_folds = int(est_args["n_folds"]) + est_args.pop("n_folds") + # est_type + est_type = est_args["type"] + est_args.pop("type") + # random_state + random_state = (random_state + hash("[estimator] {}".format(est_name))) % 1000000007 + return get_estimator_kfold(est_name, n_folds, est_type, est_args, random_state=random_state) + + def fit_transform(self, train_config): + LOGGER.info("[data][{}], bottoms={}, tops={}".format(self.name, self.bottom_names, self.top_names)) + phases = ["train", "test"] + X_train_win, y_train_win = None, None + test_sets = None + + for ti, top_name in enumerate(self.top_names): + LOGGER.info("[progress][{}] ti={}/{}, top_name={}".format(self.name, ti, len(self.top_names), top_name)) + # check top cache + if np.all(self.check_top_cache(phases, ti)): + LOGGER.info("[data][{}] all top cache exists. skip progress".format(self.name)) + continue + + # init X, y, n_classes + if X_train_win is None: + for pi, phase in enumerate(phases): + bottoms = self.data_cache.gets(phase, self.bottom_names) + LOGGER.info('[data][{},{}] bottoms.shape={}'.format(self.name, phase, repr_blobs_shape(bottoms))) + X, y = np.concatenate(bottoms[:-1], axis=1), bottoms[-1] + # n x n_windows x channel + X_win = get_windows(X, self.win_x, self.win_y, self.stride_x, self.stride_y, self.pad_x, self.pad_y) + _, nh, nw, _ = X_win.shape + X_win = X_win.reshape((X_win.shape[0], -1, X_win.shape[-1])) + y_win = y[:,np.newaxis].repeat(X_win.shape[1], axis=1) + if pi == 0: + assert self.n_classes == len(np.unique(y)), \ + "n_classes={}, len(unique(y))={}".format(self.n_classes, len(np.unique(y))) + X_train_win, y_train_win = X_win, y_win + else: + test_sets = [("test", X_win, y_win), ] + + # fit + est = self._init_estimators(ti, train_config.random_state) + y_probas = est.fit_transform(X_train_win, y_train_win, y_train_win[:,0], cache_dir=train_config.model_cache_dir, + test_sets = test_sets, eval_metrics=self.eval_metrics, + keep_model_in_mem=train_config.keep_model_in_mem) + + for pi, phase in enumerate(phases): + y_proba = y_probas[pi].reshape((-1, nh, nw, self.n_classes)).transpose((0, 3, 1, 2)) + LOGGER.info('[data][{},{}] tops[{}].shape={}'.format(self.name, phase, ti, y_proba.shape)) + self.data_cache.update(phase, self.top_names[ti], y_proba) + if train_config.keep_model_in_mem: + self.estimator1d[ti] = est + + def score(self): + eval_metrics = [("predict", accuracy_pb), ("vote", accuracy_win_vote), ("avg", accuracy_win_avg)] + for ti, top_name in enumerate(self.top_names): + for phase in ["train", "test"]: + y = self.data_cache.get(phase, self.bottom_names[-1]) + y_proba = self.data_cache.get(phase, top_name) + y_proba = y_proba.transpose((0,2,3,1)) + y_proba = y_proba.reshape((y_proba.shape[0], -1, y_proba.shape[3])) + y = y[:,np.newaxis].repeat(y_proba.shape[1], axis=1) + for eval_name, eval_metric in eval_metrics: + acc = eval_metric(y, y_proba) + LOGGER.info("Accuracy({}.{}.{})={:.2f}%".format(top_name, phase, eval_name, acc*100)) diff --git a/lib/gcforest/utils/__init__.py b/lib/gcforest/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/gcforest/utils/audio_utils.py b/lib/gcforest/utils/audio_utils.py new file mode 100644 index 0000000..c225c6d --- /dev/null +++ b/lib/gcforest/utils/audio_utils.py @@ -0,0 +1,49 @@ +import numpy as np + +def select_feature_func(feature_name): + if feature_name == 'aqibsaeed_1': + return get_feature_aqibsaeed_1 + elif feature_name == 'mfcc': + return get_feature_mfcc + +def get_feature_mfcc(X, sr, n_mfcc=13): + import librosa + mfcc = librosa.feature.mfcc(y=X, sr=sr, n_mfcc=n_mfcc) + return mfcc + +def get_feature_aqibsaeed_1(X, sr, au_path=None): + """ + http://aqibsaeed.github.io/2016-09-03-urban-sound-classification-part-1/ + """ + import librosa + if au_path is not None: + X, sr = librosa.load(au_path) + stft = np.abs(librosa.stft(X)) + mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0) + chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0) + mel = np.mean(librosa.feature.melspectrogram(X, sr=sr).T,axis=0) + contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0) + tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0) + feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz]) + return feature + +def get_feature_aqibsaeed_conv(X, sr, au_path=None): + """ + http://aqibsaeed.github.io/2016-09-24-urban-sound-classification-part-2/ + """ + import librosa + def windows(data, window_size): + start = 0 + while start < len(data): + yield start, start + window_size + start += (window_size / 2) + bands = 60 + frames = 41 + window_size = 512 * (frames - 1) + for (start,end) in windows(X, window_size): + if(len(X[start:end]) == window_size): + signal = X[start:end] + melspec = librosa.feature.melspectrogram(signal, n_mels = bands) + logspec = librosa.logamplitude(melspec) + logspec = logspec.T.flatten()[:, np.newaxis].T + log_specgrams.append(logspec) diff --git a/lib/gcforest/utils/cache_utils.py b/lib/gcforest/utils/cache_utils.py new file mode 100644 index 0000000..05f95f2 --- /dev/null +++ b/lib/gcforest/utils/cache_utils.py @@ -0,0 +1,5 @@ +def name2path(name): + """ + Replace '/' in name by '_' + """ + return name.replace("/", "-") diff --git a/lib/gcforest/utils/config_utils.py b/lib/gcforest/utils/config_utils.py new file mode 100644 index 0000000..20ea916 --- /dev/null +++ b/lib/gcforest/utils/config_utils.py @@ -0,0 +1,57 @@ +# -*- coding:utf-8 -*- + +def load_json(path): + import json + """ + 支持以//开头的注释 + """ + lines = [] + with open(path) as f: + for row in f.readlines(): + if row.strip().startswith("//"): + continue + lines.append(row) + return json.loads("\n".join(lines)) + +def get_config_value(config, key, default_value, value_types, required=False, config_name=None): + """ + + Parameters + ---------- + config: dict + Config dictionary + key: str + Config's key + default_value: str + Default value when key is absent in config + value_types: Type or List of Types + if not None, should check value belongs one value_types + required: bool + if the key is required in config + config_name: str + used for debug + """ + if config_name is not None: + log_prefix = "[{}] ".format(config_name) + else: + log_prefix = "" + if required and not key in config: + raise ValueError("{}config={}, key={} is absent but it's required !!!".format(log_prefix, config, key)) + elif not key in config: + return default_value + value = config[key] + # value type check + if value is not None: + value_type_match = True + if value_types is None: + value_types = [] + elif not isinstance(value_types, list): + value_types = [value_types] + for value_type in value_types: + if not isinstance(value, value_type): + value_type_match = False + break + if not value_type_match: + raise ValueError("{}config={}, Value type not matched!!! key={}, value={}, value_types={}".format( + log_prefix, config, key, value, value_types)) + return value diff --git a/lib/gcforest/utils/debug_utils.py b/lib/gcforest/utils/debug_utils.py new file mode 100644 index 0000000..e3e41e2 --- /dev/null +++ b/lib/gcforest/utils/debug_utils.py @@ -0,0 +1,8 @@ +def repr_blobs_shape(blobs): + res = [] + for b in blobs: + if b is not None: + res.append('x'.join(map(str, b.shape))) + else: + res.append('null') + return ','.join(res) diff --git a/lib/gcforest/utils/log_utils.py b/lib/gcforest/utils/log_utils.py new file mode 100644 index 0000000..391bc8a --- /dev/null +++ b/lib/gcforest/utils/log_utils.py @@ -0,0 +1,45 @@ +# -*- coding:utf-8 -*- +import os, os.path as osp +import time + +def strftime(t = None): + return time.strftime("%Y%m%d-%H%M%S", time.localtime(t or time.time())) + +################# +# Logging +################# +import logging +from logging.handlers import TimedRotatingFileHandler +logging.basicConfig(format="[ %(asctime)s][%(module)s.%(funcName)s] %(message)s") + +DEFAULT_LEVEL = logging.INFO +DEFAULT_LOGGING_DIR = osp.join("logs", "gcforest") +fh = None + +def init_fh(): + global fh + if fh is not None: + return + if DEFAULT_LOGGING_DIR is None: + return + if not osp.exists(DEFAULT_LOGGING_DIR): os.makedirs(DEFAULT_LOGGING_DIR) + logging_path = osp.join(DEFAULT_LOGGING_DIR, strftime() + ".log") + fh = logging.FileHandler(logging_path) + fh.setFormatter(logging.Formatter("[ %(asctime)s][%(module)s.%(funcName)s] %(message)s")) + +def update_default_level(defalut_level): + global DEFAULT_LEVEL + DEFAULT_LEVEL = defalut_level + +def update_default_logging_dir(default_logging_dir): + global DEFAULT_LOGGING_DIR + DEFAULT_LOGGING_DIR = default_logging_dir + +def get_logger(name="gcforest", level=None): + level = level or DEFAULT_LEVEL + logger = logging.getLogger(name) + logger.setLevel(level) + init_fh() + if fh is not None: + logger.addHandler(fh) + return logger diff --git a/lib/gcforest/utils/metrics.py b/lib/gcforest/utils/metrics.py new file mode 100644 index 0000000..4fc5c65 --- /dev/null +++ b/lib/gcforest/utils/metrics.py @@ -0,0 +1,37 @@ +# -*- coding:utf-8 -*- +import numpy as np + +from .win_utils import win_vote, win_avg + +def accuracy(y_true, y_pred): + return 1.0 * np.sum(y_true == y_pred) / len(y_true) + +def accuracy_pb(y_true, y_proba): + y_true = y_true.reshape(-1) + y_pred = np.argmax(y_proba.reshape((-1, y_proba.shape[-1])), 1) + return 1.0 * np.sum(y_true == y_pred) / len(y_true) + +def accuracy_win_vote(y_true, y_proba): + """ + + + Parameters + ---------- + y_true: n x n_windows + y_proba: n x n_windows x n_classes + """ + n_classes = y_proba.shape[-1] + y_pred = win_vote(np.argmax(y_proba, axis=2), n_classes) + return accuracy(y_true[:,0], y_pred) + +def accuracy_win_avg(y_true, y_proba): + """ + + + Parameters + ---------- + y_true: n x n_windows + y_proba: n x n_windows x n_classes + """ + y_pred = win_avg(y_proba) + return accuracy(y_true[:,0], y_pred) diff --git a/lib/gcforest/utils/win_utils.py b/lib/gcforest/utils/win_utils.py new file mode 100644 index 0000000..6930491 --- /dev/null +++ b/lib/gcforest/utils/win_utils.py @@ -0,0 +1,79 @@ +# -*- coding:utf-8 -*- +import numpy as np +from joblib import Parallel, delayed + +from .log_utils import get_logger + +LOGGER = get_logger('win.win_helper') + +def get_windows_channel(X, X_win, des_id, nw, nh, win_x, win_y, stride_x, stride_y): + """ + X: N x C x H x W + X_win: N x nc x nh x nw + (k, di, dj) in range(X.channle, win_y, win_x) + """ + #des_id = (k * win_y + di) * win_x + dj + dj = des_id % win_x + di = des_id / win_x % win_y + k = des_id / win_x / win_y + src = X[:, k, di:di+nh*stride_y:stride_y, dj:dj+nw*stride_x:stride_x].ravel() + des = X_win[des_id, :] + np.copyto(des, src) + +def get_windows(X, win_x, win_y, stride_x=1, stride_y=1, pad_x=0, pad_y=0): + """ + parallizing get_windows + Arguments: + X (ndarray): n x c x h x w + Return: + X_win (ndarray): n x nh x nw x nc + """ + assert len(X.shape) == 4 + n, c, h, w = X.shape + if pad_y > 0: + X = np.concatenate(( X, np.zeros((n, c, pad_y, w),dtype=X.dtype) ), axis=2) + X = np.concatenate(( np.zeros((n, c, pad_y, w),dtype=X.dtype), X ), axis=2) + n, c, h, w = X.shape + if pad_x > 0: + X = np.concatenate(( X, np.zeros((n, c, h, pad_x),dtype=X.dtype) ), axis=3) + X = np.concatenate(( np.zeros((n, c, h, pad_x),dtype=X.dtype), X ), axis=3) + n, c, h, w = X.shape + nc = win_y * win_x * c + nh = (h - win_y) / stride_y + 1 + nw = (w - win_x) / stride_x + 1 + X_win = np.empty(( nc, n * nh * nw), dtype=np.float32) + LOGGER.info("get_windows_start: X.shape={}, X_win.shape={}, nw={}, nh={}, c={}, win_x={}, win_y={}, stride_x={}, stride_y={}".format( + X.shape, X_win.shape, nw, nh, c, win_x, win_y, stride_x, stride_y)) + Parallel(n_jobs=-1, backend="threading", verbose=0)( + delayed(get_windows_channel)(X, X_win, des_id, nw, nh, win_x, win_y, stride_x, stride_y) + for des_id in range(c * win_x * win_y)) + LOGGER.info("get_windows_end") + X_win = X_win.transpose((1, 0)) + X_win = X_win.reshape((n, nh, nw, nc)) + return X_win + +def calc_accuracy(y_gt, y_pred, tag): + LOGGER.info("Accuracy({})={:.2f}%".format(tag, np.sum(y_gt==y_pred)*100./len(y_gt))) + +def win_vote(y_win_predict, n_classes): + """ + + y_win_predict (ndarray): n x n_window + y_win_predict[i, j] prediction for the ith data of jth window + """ + y_pred = np.zeros(len(y_win_predict), dtype=np.int16) + for i, y_bag in enumerate(y_win_predict): + y_pred[i] = np.argmax(np.bincount(y_bag,minlength=n_classes)) + return y_pred + +def win_avg(y_win_proba): + """ + + Parameters + ---------- + y_win_proba: n x n_windows x n_classes + """ + n_classes = y_win_proba.shape[-1] + y_bag_proba = np.mean(y_win_proba, axis=1) + y_pred = np.argmax(y_bag_proba, axis=1) + return y_pred diff --git a/models/cifar10/gcforest/fg-tree500-depth100-3folds-ca.json b/models/cifar10/gcforest/fg-tree500-depth100-3folds-ca.json new file mode 100644 index 0000000..1248ed1 --- /dev/null +++ b/models/cifar10/gcforest/fg-tree500-depth100-3folds-ca.json @@ -0,0 +1,46 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset": { + "train": { + "type": "ds_pickle2", + "data_path": "/storage/root/gcforest/cifar10/fg-tree500-depth100-3folds/datas/train/outputs.pkl", + "X_keys": ["pool1/8x8/ets", "pool1/8x8/rf", "pool1/11x11/ets", "pool1/11x11/rf", "pool1/16x16/ets", "pool1/16x16/rf"] + }, + "test": { + "type": "ds_pickle2", + "data_path": "/storage/root/gcforest/cifar10/fg-tree500-depth100-3folds/datas/test/outputs.pkl", + "X_keys": ["pool1/8x8/ets", "pool1/8x8/rf", "pool1/11x11/ets", "pool1/11x11/rf", "pool1/16x16/ets", "pool1/16x16/rf"] + } +}, +"cascade": { + "random_state": 0, + "max_layers": 100, + "early_stopping_rounds": 8, + "look_indexs_cycle": [ + [0, 1], + [2, 3], + [4, 5], + [6, 7], + [0, 1, 2, 3, 4, 5, 6, 7] + ], + "n_classes": 10, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":1000,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":1000,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":1000,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":1000,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":1000,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":1000,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":1000,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":1000,"max_depth":null,"n_jobs":-1} + ], + "data_save_dir": "/storage/root/gcforest/cifar10/fg-tree500-depth100-3folds/cascade/datas", + "data_save_rounds": 5 +} +} diff --git a/models/cifar10/gcforest/fg-tree500-depth100-3folds.json b/models/cifar10/gcforest/fg-tree500-depth100-3folds.json new file mode 100644 index 0000000..1c937e8 --- /dev/null +++ b/models/cifar10/gcforest/fg-tree500-depth100-3folds.json @@ -0,0 +1,91 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset":{ + "train": {"type": "cifar10", "data_set": "train", "layout_x": "tensor"}, + "test": {"type": "cifar10", "data_set": "test", "layout_x": "tensor"} +}, +"train":{ + "keep_model_in_mem":0, + "random_state":0, + "data_cache":{ + "cache_in_disk":{ + "default":1 + }, + "keep_in_mem":{ + "default":0 + }, + "cache_dir":"/mnt/raid/fengji/gcforest/cifar10/fg-tree500-depth100-3folds/datas" + } +}, +"net":{ +"outputs": ["pool1/8x8/ets", "pool1/8x8/rf", "pool1/11x11/ets", "pool1/11x11/rf", "pool1/16x16/ets", "pool1/16x16/rf"], +"layers":[ +// win1/8x8 + { + "type":"FGWinLayer", + "name":"win1/8x8", + "bottoms": ["X","y"], + "tops":["win1/8x8/ets", "win1/8x8/rf"], + "n_classes": 10, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "stride_x": 2, + "stride_y": 2, + "win_x":8, + "win_y":8 + }, +// win1/11x11 + { + "type":"FGWinLayer", + "name":"win1/11x11", + "bottoms": ["X","y"], + "tops":["win1/11x11/ets", "win1/11x11/rf"], + "n_classes": 10, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "stride_x": 2, + "stride_y": 2, + "win_x":11, + "win_y":11 + }, +// win1/16x16 + { + "type":"FGWinLayer", + "name":"win1/16x16", + "bottoms": ["X","y"], + "tops":["win1/16x16/ets", "win1/16x16/rf"], + "n_classes": 10, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "stride_x": 2, + "stride_y": 2, + "win_x":16, + "win_y":16 + }, +// pool1 + { + "type":"FGPoolLayer", + "name":"pool1", + "bottoms": ["win1/8x8/ets", "win1/8x8/rf", "win1/11x11/ets", "win1/11x11/rf", "win1/16x16/ets", "win1/16x16/rf"], + "tops": ["pool1/8x8/ets", "pool1/8x8/rf", "pool1/11x11/ets", "pool1/11x11/rf", "pool1/16x16/ets", "pool1/16x16/rf"], + "pool_method": "avg", + "win_x":2, + "win_y":2 + } +] + +} +} + diff --git a/models/gtzan/gcforest/ca-tree500-n4x2-3folds.json b/models/gtzan/gcforest/ca-tree500-n4x2-3folds.json new file mode 100644 index 0000000..a071081 --- /dev/null +++ b/models/gtzan/gcforest/ca-tree500-n4x2-3folds.json @@ -0,0 +1,30 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset":{ + "train": {"type": "gtzan", "data_set": "genre.train", "layout_x": "vector", "cache":"mfcc"}, + "test": {"type": "gtzan", "data_set": "genre.val", "layout_x": "vector", "cache":"mfcc"} +}, +"cascade": { + //"data_save_dir": "/mnt/raid/fengji/gcforest/gtzan/ca-tree500-n4x2-3folds/", + "random_state": 0, + "early_stopping_rounds": 4, + "n_classes": 10, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} + diff --git a/models/gtzan/gcforest/fg-tree500-depth100-3folds-ca.json b/models/gtzan/gcforest/fg-tree500-depth100-3folds-ca.json new file mode 100644 index 0000000..5da5035 --- /dev/null +++ b/models/gtzan/gcforest/fg-tree500-depth100-3folds-ca.json @@ -0,0 +1,42 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset": { + "train": { + "type": "ds_pickle2", + "data_path": "/mnt/raid/fengji/gcforest/gtzan/fg-tree500-depth100-3folds/datas/train/outputs.pkl", + "X_keys": ["pool1/320/ets", "pool1/320/rf", "pool1/160/ets", "pool1/160/rf", "pool1/80/ets", "pool1/80/rf"] + }, + "test": { + "type": "ds_pickle2", + "data_path": "/mnt/raid/fengji/gcforest/gtzan/fg-tree500-depth100-3folds/datas/test/outputs.pkl", + "X_keys": ["pool1/320/ets", "pool1/320/rf", "pool1/160/ets", "pool1/160/rf", "pool1/80/ets", "pool1/80/rf"] + } +}, +"cascade": { + "random_state": 0, + "max_layers": 100, + "early_stopping_rounds": 4, + "look_indexs_cycle": [ + [0, 1], + [2, 3], + [4, 5] + ], + "n_classes": 10, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} diff --git a/models/gtzan/gcforest/fg-tree500-depth100-3folds.json b/models/gtzan/gcforest/fg-tree500-depth100-3folds.json new file mode 100644 index 0000000..320a892 --- /dev/null +++ b/models/gtzan/gcforest/fg-tree500-depth100-3folds.json @@ -0,0 +1,86 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset":{ + "train": {"type": "gtzan", "data_set": "genre.train", "layout_x": "tensor", "cache":"mfcc"}, + "test": {"type": "gtzan", "data_set": "genre.val", "layout_x": "tensor", "cache":"mfcc"} +}, + +"train": { + "keep_model_in_mem":0, + "data_cache":{ + "cache_in_disk":{ + "default":1 + }, + "keep_in_mem":{ + "default":0 + }, + "cache_dir":"/mnt/raid/fengji/gcforest/gtzan/fg-tree500-depth100-3folds/datas" + } +}, + +"net": { +"outputs": ["pool1/320/ets", "pool1/320/rf", "pool1/160/ets", "pool1/160/rf", "pool1/80/ets", "pool1/80/rf"], +"layers":[ +// win1/320 + { + "type":"FGWinLayer", + "name":"win1/320", + "bottoms": ["X","y"], + "tops":["win1/320/ets", "win1/320/rf"], + "n_classes":10, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "win_x":1, + "win_y":320 + }, +// win1/160 + { + "type":"FGWinLayer", + "name":"win1/160", + "bottoms": ["X","y"], + "tops":["win1/160/ets", "win1/160/rf"], + "n_classes":10, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "n_splits":3, + "win_x":1, + "win_y":160 + }, +// win1/80 + { + "type":"FGWinLayer", + "name":"win1/80", + "bottoms": ["X","y"], + "tops":["win1/80/ets", "win1/80/rf"], + "n_classes":10, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "win_x":1, + "win_y":80 + }, +// pool1 + { + "type":"FGPoolLayer", + "name":"pool1", + "bottoms": ["win1/320/ets", "win1/320/rf", "win1/160/ets", "win1/160/rf", "win1/80/ets", "win1/80/rf"], + "tops": ["pool1/320/ets", "pool1/320/rf", "pool1/160/ets", "pool1/160/rf", "pool1/80/ets", "pool1/80/rf"], + "pool_method": "avg", + "win_x":2, + "win_y":2 + } +] + +} +} diff --git a/models/imdb/gcforest/ca-tree500-n4x2-3folds.json b/models/imdb/gcforest/ca-tree500-n4x2-3folds.json new file mode 100644 index 0000000..f43a4a5 --- /dev/null +++ b/models/imdb/gcforest/ca-tree500-n4x2-3folds.json @@ -0,0 +1,29 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset": { + "train": {"type": "imdb", "data_set": "train", "layout_x": "vector", "feature": "tfidf"}, + "test": {"type": "imdb", "data_set": "test", "layout_x": "vector", "feature": "tfidf"} +}, +"cascade": { + //"data_save_dir": "/mnt/raid/fengji/gcforest/imdb/ca-tree500-n4x2-3folds/", + "random_state": 0, + "early_stopping_rounds": 4, + "n_classes": 2, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} diff --git a/models/mnist/gcforest/ca-tree500-n4x2-3folds.json b/models/mnist/gcforest/ca-tree500-n4x2-3folds.json new file mode 100644 index 0000000..a6ce682 --- /dev/null +++ b/models/mnist/gcforest/ca-tree500-n4x2-3folds.json @@ -0,0 +1,30 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset":{ + "train": {"type": "mnist", "data_set": "train", "layout_x": "tensor"}, + "test": {"type": "mnist", "data_set": "test", "layout_x": "tensor"} +}, +"cascade": { + //"data_save_dir": "/mnt/raid/fengji/gcforest/mnist/ca-tree500-n4x2-3folds/", + "random_state": 0, + "early_stopping_rounds": 4, + "n_classes": 10, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} + diff --git a/models/mnist/gcforest/fg-tree500-depth100-3folds-ca.json b/models/mnist/gcforest/fg-tree500-depth100-3folds-ca.json new file mode 100644 index 0000000..5e5a64a --- /dev/null +++ b/models/mnist/gcforest/fg-tree500-depth100-3folds-ca.json @@ -0,0 +1,45 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset": { + "train": { + "type": "ds_pickle2", + "data_path": "/mnt/raid/fengji/gcforest/mnist/fg-tree300-depth0/datas/train/outputs.pkl", + "X_keys": ["pool1/7x7/ets", "pool1/7x7/rf", "pool1/10x10/ets", "pool1/10x10/rf", "pool1/13x13/ets", "pool1/13x13/rf"] + }, + "test": { + "type": "ds_pickle2", + "data_path": "/mnt/raid/fengji/gcforest/mnist/fg-tree300-depth0/datas/test/outputs.pkl", + "X_keys": ["pool1/7x7/ets", "pool1/7x7/rf", "pool1/10x10/ets", "pool1/10x10/rf", "pool1/13x13/ets", "pool1/13x13/rf"] + } +}, +"cascade": { + "random_state": 0, + "max_layers": 100, + "early_stopping_rounds": 8, + "look_indexs_cycle": [ + [0, 1], + [2, 3], + [4, 5], + [6, 7] + ], + "n_classes": 10, + "estimators": [ + {"n_folds":5,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":5,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":5,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":5,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":5,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":5,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":5,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":5,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ], + "data_save_dir": "/mnt/raid/fengji/gcforest/mnist/fg-tree300-depth0-3folds/cascade/data" +} +} + diff --git a/models/mnist/gcforest/fg-tree500-depth100-3folds.json b/models/mnist/gcforest/fg-tree500-depth100-3folds.json new file mode 100644 index 0000000..5dbb7c5 --- /dev/null +++ b/models/mnist/gcforest/fg-tree500-depth100-3folds.json @@ -0,0 +1,91 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset":{ + "train": {"type": "mnist", "data_set": "train", "layout_x": "tensor"}, + "test": {"type": "mnist", "data_set": "test", "layout_x": "tensor"} +}, +"train":{ + "keep_model_in_mem":0, + //"model_cache_dir":"/mnt/raid/fengji/gcforest/mnist/fg-tree500-depth100-3folds/models", + "random_state":0, + "data_cache":{ + "cache_in_disk":{ + "default":1 + }, + "keep_in_mem":{ + "default":0 + }, + "cache_dir":"/mnt/raid/fengji/gcforest/mnist/fg-tree500-depth100-3folds/datas" + } +}, +"net":{ +"outputs": ["pool1/7x7/ets", "pool1/7x7/rf", "pool1/10x10/ets", "pool1/10x10/rf", "pool1/13x13/ets", "pool1/13x13/rf"], +"layers":[ +// win1/7x7 + { + "type":"FGWinLayer", + "name":"win1/7x7", + "bottoms": ["X","y"], + "tops":["win1/7x7/ets", "win1/7x7/rf"], + "n_classes": 10, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "stride_x": 2, + "stride_y": 2, + "win_x":7, + "win_y":7 + }, +// win1/10x10 + { + "type":"FGWinLayer", + "name":"win1/10x10", + "bottoms": ["X","y"], + "tops":["win1/10x10/ets", "win1/10x10/rf"], + "n_classes": 10, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "stride_x": 2, + "stride_y": 2, + "win_x":10, + "win_y":10 + }, +// win1/13x13 + { + "type":"FGWinLayer", + "name":"win1/13x13", + "bottoms": ["X","y"], + "tops":["win1/13x13/ets", "win1/13x13/rf"], + "n_classes": 10, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "stride_x": 2, + "stride_y": 2, + "win_x":13, + "win_y":13 + }, +// pool1 + { + "type":"FGPoolLayer", + "name":"pool1", + "bottoms": ["win1/7x7/ets", "win1/7x7/rf", "win1/10x10/ets", "win1/10x10/rf", "win1/13x13/ets", "win1/13x13/rf"], + "tops": ["pool1/7x7/ets", "pool1/7x7/rf", "pool1/10x10/ets", "pool1/10x10/rf", "pool1/13x13/ets", "pool1/13x13/rf"], + "pool_method": "avg", + "win_x":2, + "win_y":2 + } +] + +} +} diff --git a/models/uci_adult/gcforest/ca-tree500-n4x2-3folds.json b/models/uci_adult/gcforest/ca-tree500-n4x2-3folds.json new file mode 100644 index 0000000..4063a78 --- /dev/null +++ b/models/uci_adult/gcforest/ca-tree500-n4x2-3folds.json @@ -0,0 +1,29 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset": { + "train": {"type": "uci_adult", "data_set": "train", "layout_x": "vector", "cate_as_onehot": 1}, + "test": {"type": "uci_adult", "data_set": "test", "layout_x": "vector", "cate_as_onehot": 1} +}, +"cascade": { + //"data_save_dir": "/mnt/raid/fengji/gcforest/uci_adult/ca-tree500-n4x2-3folds/", + "random_state": 0, + "early_stopping_rounds": 4, + "n_classes": 2, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} diff --git a/models/uci_letter/gcforest/ca-tree500-n4x2-3folds.json b/models/uci_letter/gcforest/ca-tree500-n4x2-3folds.json new file mode 100644 index 0000000..fe27ba0 --- /dev/null +++ b/models/uci_letter/gcforest/ca-tree500-n4x2-3folds.json @@ -0,0 +1,29 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset": { + "train": {"type": "uci_letter", "data_set": "train", "layout_x": "vector"}, + "test": {"type": "uci_letter", "data_set": "test", "layout_x": "vector"} +}, +"cascade": { + "data_save_dir": "/mnt/raid/fengji/gcforest/uci_letter/ca-tree500-n4x2-3folds/", + "random_state": 0, + "early_stopping_rounds": 4, + "n_classes": 26, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} diff --git a/models/uci_semg/gcforest/ca-tree500-n4x2-3folds.json b/models/uci_semg/gcforest/ca-tree500-n4x2-3folds.json new file mode 100644 index 0000000..e245444 --- /dev/null +++ b/models/uci_semg/gcforest/ca-tree500-n4x2-3folds.json @@ -0,0 +1,30 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset":{ + "train": {"type": "uci_semg", "data_set": "train", "layout_x": "tensor"}, + "test": {"type": "uci_semg", "data_set": "test", "layout_x": "tensor"} +}, +"cascade": { + //"data_save_dir": "/mnt/raid/fengji/gcforest/uci_semg/ca-tree500-n4x2-3folds/", + "random_state": 0, + "early_stopping_rounds": 4, + "n_classes": 6, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} + diff --git a/models/uci_semg/gcforest/fg-tree500-depth100-3folds-ca.json b/models/uci_semg/gcforest/fg-tree500-depth100-3folds-ca.json new file mode 100644 index 0000000..f1e1213 --- /dev/null +++ b/models/uci_semg/gcforest/fg-tree500-depth100-3folds-ca.json @@ -0,0 +1,44 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset": { + "train": { + "type": "ds_pickle2", + "data_path": "/mnt/raid/fengji/gcforest/uci_semg/fg-tree500-depth100-3folds/datas/train/outputs.pkl", + "X_keys": ["pool1/187/ets", "pool1/187/rf", "pool1/375/ets", "pool1/375/rf", "pool1/750/ets", "pool1/750/rf"] + }, + "test": { + "type": "ds_pickle2", + "data_path": "/mnt/raid/fengji/gcforest/uci_semg/fg-tree500-depth100-3folds/datas/test/outputs.pkl", + "X_keys": ["pool1/187/ets", "pool1/187/rf", "pool1/375/ets", "pool1/375/rf", "pool1/750/ets", "pool1/750/rf"] + } +}, +"cascade": { + //"data_save_dir": "/mnt/raid/fengji/gcforest/uci_semg/fg-tree500-depth100-3folds/cascade/", + "random_state": 0, + "early_stopping_rounds": 4, + "n_classes": 6, + "look_indexs_cycle": [ + [0, 1], + [2, 3], + [4, 5] + ], + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} + + diff --git a/models/uci_semg/gcforest/fg-tree500-depth100-3folds.json b/models/uci_semg/gcforest/fg-tree500-depth100-3folds.json new file mode 100644 index 0000000..ac424e6 --- /dev/null +++ b/models/uci_semg/gcforest/fg-tree500-depth100-3folds.json @@ -0,0 +1,87 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset":{ + "train": {"type": "uci_semg", "data_set": "train", "layout_x": "tensor"}, + "test": {"type": "uci_semg", "data_set": "test", "layout_x": "tensor"} +}, + +"train": { + "keep_model_in_mem":0, + //"model_cache_dir":"/mnt/raid/fengji/gcforest/uci_semg/fg-tree500-depth100-3folds/models", + "data_cache":{ + "cache_in_disk":{ + "default":1 + }, + "keep_in_mem":{ + "default":0 + }, + "cache_dir":"/mnt/raid/fengji/gcforest/uci_semg/fg-tree500-depth100-3folds/datas" + } +}, + +"net": { +"outputs": ["pool1/187/ets", "pool1/187/rf", "pool1/375/ets", "pool1/375/rf", "pool1/750/ets", "pool1/750/rf"], +"layers":[ +// win1/187 + { + "type":"FGWinLayer", + "name":"win1/187", + "bottoms": ["X","y"], + "tops":["win1/187/ets", "win1/187/rf"], + "n_classes":6, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "win_x":1, + "win_y":187 + }, +// win1/375 + { + "type":"FGWinLayer", + "name":"win1/375", + "bottoms": ["X","y"], + "tops":["win1/375/ets", "win1/375/rf"], + "n_classes":6, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "n_splits":3, + "win_x":1, + "win_y":375 + }, +// win1/750 + { + "type":"FGWinLayer", + "name":"win1/750", + "bottoms": ["X","y"], + "tops":["win1/750/ets", "win1/750/rf"], + "n_classes":6, + "estimators": [ + {"n_folds":3, "type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}, + {"n_folds":3, "type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10} + ], + "win_x":1, + "win_y":750 + }, +// pool1 + { + "type":"FGPoolLayer", + "name":"pool1", + "bottoms": ["win1/187/ets", "win1/187/rf", "win1/375/ets", "win1/375/rf", "win1/750/ets", "win1/750/rf"], + "tops": ["pool1/187/ets", "pool1/187/rf", "pool1/375/ets", "pool1/375/rf", "pool1/750/ets", "pool1/750/rf"], + "pool_method": "avg", + "win_x":2, + "win_y":2 + } +] + +} +} diff --git a/models/uci_yeast/gcforest/ca-tree500-n4x2-3folds.json b/models/uci_yeast/gcforest/ca-tree500-n4x2-3folds.json new file mode 100644 index 0000000..9190eed --- /dev/null +++ b/models/uci_yeast/gcforest/ca-tree500-n4x2-3folds.json @@ -0,0 +1,29 @@ +//Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +//fit your own datasets. +//Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +//Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +//ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou(zhouzh@lamda.nju.edu.cn) +//ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. + +{ +"dataset": { + "train": {"type": "uci_yeast", "data_set": "train", "layout_x": "vector"}, + "test": {"type": "uci_yeast", "data_set": "test", "layout_x": "vector"} +}, +"cascade": { + //"data_save_dir": "/mnt/raid/fengji/gcforest/uci_yeast/ca-tree500-n4x2-3folds/", + "random_state": 0, + "early_stopping_rounds": 4, + "n_classes": 10, + "estimators": [ + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1,"max_features":1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1}, + {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":null,"n_jobs":-1} + ] +} +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ca9410c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +argparse +joblib +keras +psutil +scikit-learn>=0.18.1 +scipy +simplejson +tensorflow diff --git a/tools/audio/cache_feature.py b/tools/audio/cache_feature.py new file mode 100644 index 0000000..940c7bc --- /dev/null +++ b/tools/audio/cache_feature.py @@ -0,0 +1,85 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +from joblib import Parallel, delayed +import librosa +import numpy as np +import os, os.path as osp +import sys + +sys.path.insert(0, 'lib') +from gcforest.datasets.gtzan import GTZAN +from gcforest.utils.audio_utils import select_feature_func + +def parse_args(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--dataset', dest='dataset', type=str, default='gtzan', + choices=['gtzan']) + parser.add_argument('--split', dest='split', type=str, required=True) + parser.add_argument('--feature', dest='feature', type=str, required=True) + args = parser.parse_args() + return args + +def save_cache(src_path, des_path, get_feature_func): + des_path = osp.splitext(des_path)[0] + '.npy' + try: + X, sr = librosa.load(src_path) + src = int(sr) + feature = get_feature_func(X, sr) + print('[INFO] Saving Cache in {} ...'.format(des_path)) + des_par = osp.abspath(osp.join(des_path, osp.pardir)) + if not osp.exists(des_par): + os.makedirs(des_par) + except Exception, e: + print("[ERROR] Unkown error happend when dealing with{}".format(src_path)) + #print(e) + return -1 + np.save(des_path, feature) + return 0 + +if __name__ == '__main__': + args = parse_args() + if args.dataset == 'urbansound8k': + dataset = UrbanSound8K(data_set=args.split, layout_x='rel_path') + elif args.dataset == 'gtzan': + dataset = GTZAN(data_set=args.split, layout_x='rel_path') + + feature_name = args.feature + get_feature_func = select_feature_func(feature_name) + + rel_paths = dataset.X + src_paths = [] + des_paths = [] + for rel_path in rel_paths: + des_path = osp.join(dataset.cache_base, feature_name, rel_path) + if osp.exists(des_path): + continue + src_paths.append(osp.join(dataset.data_base, rel_path)) + des_paths.append(des_path) + print('Total={}, Done={}, Undo={}'.format(len(rel_paths), len(rel_paths)-len(src_paths), len(src_paths))) + print('src_paths[:5]={}'.format(src_paths[:5])) + print('des_paths[:5]={}'.format(des_paths[:5])) + + status = Parallel(n_jobs=-1, verbose=1, backend='multiprocessing')( + delayed(save_cache)(src_paths[i], des_paths[i], get_feature_func) + for i, src_path in enumerate(src_paths)) + + # check error + error_src_paths = [] + for i, src_path in enumerate(src_paths): + if status[i] == -1: + error_src_paths.append(src_path) + print('len(error_src_paths)={}, error_src_paths[:5]={}'.format( + len(error_src_paths), error_src_paths[:5])) + if len(error_src_paths) > 0: + error_save_path = dataset.data_set_path + '.error' + with open(error_save_path, 'wb') as f: + for error_src_path in error_src_paths: + f.write('{}\n'.format(error_src_path)) + import IPython; IPython.embed() diff --git a/tools/train_cascade.py b/tools/train_cascade.py new file mode 100644 index 0000000..236d35a --- /dev/null +++ b/tools/train_cascade.py @@ -0,0 +1,44 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import sys, os, os.path as osp +import argparse +import logging +import numpy as np +import json + +sys.path.insert(0, 'lib') +from gcforest.utils.log_utils import get_logger, update_default_level, update_default_logging_dir +from gcforest.utils.config_utils import load_json +#update_default_level(logging.DEBUG) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--model', dest='model', type=str, required=True, help='gcfoest Net Model File') + parser.add_argument('--log_dir', dest='log_dir', type=str, default=None, help='Log file directory') + args = parser.parse_args() + return args + +if __name__ == '__main__': + args = parse_args() + config = load_json(args.model) + if args.log_dir is not None: + update_default_logging_dir(args.log_dir) + from gcforest.cascade.cascade_classifier import CascadeClassifier + from gcforest.datasets import get_dataset + LOGGER = get_logger("tools.train_cascade") + LOGGER.info("tools.train_cascade") + LOGGER.info("\n" + json.dumps(config, sort_keys=True, indent=4, separators=(',', ':'))) + + data_train = get_dataset(config["dataset"]["train"]) + data_test = get_dataset(config["dataset"]["test"]) + + cascade = CascadeClassifier(config["cascade"]) + opt_layer_id, X_train, y_train, X_test, y_test = cascade.fit_transform(data_train.X, data_train.y, data_test.X, data_test.y) + + import IPython; IPython.embed() diff --git a/tools/train_fg.py b/tools/train_fg.py new file mode 100644 index 0000000..c79365b --- /dev/null +++ b/tools/train_fg.py @@ -0,0 +1,60 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import sys, os, os.path as osp +import argparse +import logging +import numpy as np +import json + +sys.path.insert(0, 'lib') +from gcforest.utils.log_utils import get_logger, update_default_level, update_default_logging_dir +from gcforest.utils.config_utils import load_json + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--model', dest='model', type=str, required=True, help='gcfoest Net Model File') + parser.add_argument('--save_outputs', dest='save_outputs', action="store_true", help="Save outputs") + parser.add_argument('--log_dir', dest='log_dir', type=str, default=None, help='Log file directory') + args = parser.parse_args() + return args + +if __name__ == '__main__': + args = parse_args() + config = load_json(args.model) + update_default_level(logging.DEBUG) + if args.log_dir is not None: + update_default_logging_dir(args.log_dir) + from gcforest.fgnet import FGNet, FGTrainConfig + from gcforest.exp_utils import prec_ets, prec_rf, prec_log, prec_xgb, concat_datas + from gcforest.datasets import get_dataset + LOGGER = get_logger("tools.train_fg") + LOGGER.info("tools.train_fg") + LOGGER.info("\n" + json.dumps(config, sort_keys=True, indent=4, separators=(',', ':'))) + + train_config = FGTrainConfig(config["train"]) + if args.save_outputs: + assert train_config.data_cache.cache_dir is not None, \ + "Data cache dir must be set in model's json config when save_outputs option is on!!" + + data_train = get_dataset(config["dataset"]["train"]) + data_test = get_dataset(config["dataset"]["test"]) + + net = FGNet(config["net"], train_config.data_cache) + net.fit_transform(data_train.X, data_train.y, data_test.X, data_test.y, train_config) + + if args.save_outputs: + net.save_outputs("train") + net.save_outputs("test") + #prec_ets(1000, X_train_enc, data_train.y, X_test_enc, data_test.y, random_state=0) + #prec_ets(2000, X_train_enc, data_train.y, X_test_enc, data_test.y, random_state=0) + #prec_xgb(1000, 5, + # concat_datas(net.get_outputs("train")), data_train.y, + # concat_datas(net.get_outputs("test")), data_test.y) + + import IPython; IPython.embed() diff --git a/tools/train_xgb.py b/tools/train_xgb.py new file mode 100644 index 0000000..043b423 --- /dev/null +++ b/tools/train_xgb.py @@ -0,0 +1,58 @@ +""" +Description: A python 2.7 implementation of gcForest proposed in [1]. A demo implementation of gcForest library as well as some demo client scripts to demostrate how to use the code. The implementation is flexible enough for modifying the model or +fit your own datasets. +Reference: [1] Z.-H. Zhou and J. Feng. Deep Forest: Towards an Alternative to Deep Neural Networks. In IJCAI-2017. (https://arxiv.org/abs/1702.08835v2 ) +Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt +ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou +ATTN2: This package was developed by Mr.Ji Feng(fengj@lamda.nju.edu.cn). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. +""" +import sys, os, os.path as osp +import argparse +import numpy as np +import xgboost as xgb +sys.path.insert(0, 'lib') + +from gcforest.utils.log_utils import get_logger, update_default_level, update_default_logging_dir +from gcforest.fgnet import FGNet, FGTrainConfig +from gcforest.utils.config_utils import load_json +from gcforest.exp_utils import concat_datas +from gcforest.datasets import get_dataset +LOGGER = get_logger("tools.tarin_xgb") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--model', dest='model', type=str, required=True, help='gcfoest Net Model File') + args = parser.parse_args() + return args + +def train_xgb(X_train, y_train, X_test, y_test): + n_trees = 1000 + X_train = X_train.reshape((X_train.shape[0], -1)) + X_test = X_test.reshape((X_test.shape[0], -1)) + LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format( + n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape)) + clf = xgb.XGBClassifier(n_estimators=n_trees, max_depth=5, objective='multi:softprob', + seed=0, silent=True, nthread=-1, learning_rate=0.1) + eval_set = [(X_test, y_test)] + clf.fit(X_train, y_train, eval_set=eval_set, eval_metric="merror", early_stopping_rounds=10) + y_pred = clf.predict(X_test) + prec = float(np.sum(y_pred == y_test)) / len(y_test) + LOGGER.info('prec_xgb_{}={:.6f}%'.format(n_trees, prec*100.0)) + return clf, y_pred + +if __name__ == "__main__": + """ + Use xgboost to train and test the output produced by gcforest + """ + args = parse_args() + config = load_json(args.model) + train_config = FGTrainConfig(config["train"]) + net = FGNet(config["net"], train_config.data_cache) + + data_train = get_dataset(config["dataset"]["train"]) + data_test = get_dataset(config["dataset"]["test"]) + + train_xgb( + concat_datas(net.get_outputs("train")), data_train.y, + concat_datas(net.get_outputs("test")), data_test.y) + import IPython; IPython.embed()