diff --git a/deepsphere/utils.py b/deepsphere/utils.py index bd8c79a..389f0a4 100644 --- a/deepsphere/utils.py +++ b/deepsphere/utils.py @@ -19,6 +19,16 @@ else: from urllib import urlretrieve +def build_index(level): + values1 = np.arange(4).reshape([2,2]) + if level==1: + values = values1 + else: + values = np.zeros([2**level,2**level]) + lowerlevel = build_index(level-1) + values += np.tile(lowerlevel,[2,2]) + values += 4**(level-1)*np.repeat(np.repeat(values1,2**(level-1), axis=1), 2**(level-1), axis=0) + return values def healpix_weightmatrix(nside=16, nest=True, indexes=None, dtype=np.float32): """Return an unnormalized weight matrix for a graph using the HEALPIX sampling. diff --git a/deepsphere/cnn.py b/experimental/cnn.py similarity index 100% rename from deepsphere/cnn.py rename to experimental/cnn.py diff --git a/experimental/demo_part_sphere-with2dcnn.ipynb b/experimental/demo_part_sphere-with2dcnn.ipynb new file mode 100644 index 0000000..2940aa3 --- /dev/null +++ b/experimental/demo_part_sphere-with2dcnn.ipynb @@ -0,0 +1,525 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# [DeepSphere]: a spherical convolutional neural network\n", + "[DeepSphere]: https://github.com/SwissDataScienceCenter/DeepSphere\n", + "\n", + "[Nathanaƫl Perraudin](https://perraudin.info), [Michaƫl Defferrard](http://deff.ch), Tomasz Kacprzak, Raphael Sgier\n", + "\n", + "# Demo: part of sphere 2D ConvNet\n", + "\n", + "This demo uses the whole datataset, smoothing, and the addition of noise.\n", + "\n", + "**You need a private dataset to execute this notebook.**\n", + "See the [README](https://github.com/SwissDataScienceCenter/DeepSphere/tree/master#reproducing-the-results-of-the-paper).\n", + "But you can use it with your own data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 0.1 Load packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "\n", + "# Run on first GPU.\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", + "\n", + "# To get the CUDA profiler (do it on the CLI before starting jupyter):\n", + "# export LD_LIBRARY_PATH=/usr/local/cuda-9.0/extras/CUPTI/lib64\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from deepsphere import models, experiment_helper, plot, utils\n", + "from deepsphere.data import LabeledDatasetWithNoise, LabeledDataset\n", + "import hyperparameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.rcParams['figure.figsize'] = (17, 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 0.2 Definition of the parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### A) Non tunable parameters\n", + "These parameters are fixed or the preprocessing script has to be modified." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Nside = 1024\n", + "sigma = 3\n", + "data_path = 'data/same_psd/'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### B) Tunable parameters\n", + "These parameters can be changed.\n", + "\n", + "We choose to work in the noiseless setting by setting `sigma_noise = 0`. This allows this notebook to run an acceptable time. In the noisy case, the training of the network needs considerably more iterations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "order = 2 # 1,2,4,8 correspond to 12,48,192,768 parts of the sphere.\n", + "sigma_noise = 2 # Amount of noise for the experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1 Data preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1 Data download\n", + "Set `download` to `True` to download the dataset from zenodo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "download = False\n", + "if download:\n", + " %run -i 'download.py'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2 Data preprocessing\n", + "Apply the preprocessing steps.\n", + "1. Remove the mean of the maps\n", + "2. Smooth with a radius of 3 arcmin. (`sigma` parameter)\n", + "\n", + "Set `preprocess` to `True` to execute the preprocessing script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "preprocess = False\n", + "if preprocess:\n", + " %run -i 'data_preprocess.py'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us display the resulting PSDs of the preprocessed data. We pre-computed the PSDs for faster execution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "compute = False\n", + "if compute:\n", + " psd = experiment_helper.psd\n", + " data_path = 'data/same_psd/'\n", + " ds1 = np.load(data_path+'smoothed_class1_sigma{}.npz'.format(sigma))['arr_0']\n", + " ds2 = np.load(data_path+'smoothed_class2_sigma{}.npz'.format(sigma))['arr_0']\n", + " psds_img1 = [psd(img) for img in ds1]\n", + " psds_img2 = [psd(img) for img in ds2]\n", + " np.savez('results/psd_data_sigma{}'.format(sigma), psd_class1=psds_img1, psd_class2=psds_img2)\n", + "else:\n", + " psds_img1 = np.load('results/psd_data_sigma{}.npz'.format(sigma))['psd_class1']\n", + " psds_img2 = np.load('results/psd_data_sigma{}.npz'.format(sigma))['psd_class2']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The PSD of the two classes is almost indistinguishable. \n", + "\n", + "Spoiler Alert! This is the reason why PSD features are not good enough to classify the data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ell = np.arange(psds_img1.shape[1])\n", + "\n", + "plot.plot_with_std(ell,np.stack(psds_img1)*ell*(ell+1), label='class 1, $\\Omega_m=0.31$, $\\sigma_8=0.82$, $h=0.7$', color='r')\n", + "plot.plot_with_std(ell,np.stack(psds_img2)*ell*(ell+1), label='class 2, $\\Omega_m=0.26$, $\\sigma_8=0.91$, $h=0.7$', color='b')\n", + "plt.legend(fontsize=16);\n", + "plt.xlim([11, np.max(ell)])\n", + "plt.ylim([1e-6, 5e-4])\n", + "plt.yscale('log')\n", + "plt.xscale('log')\n", + "plt.xlabel('$\\ell$: spherical harmonic index', fontsize=18)\n", + "plt.ylabel('$C_\\ell \\cdot \\ell \\cdot (\\ell+1)$', fontsize=18)\n", + "plt.title('Power Spectrum Density, 3-arcmin smoothing, noiseless, Nside=1024', fontsize=18);\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2 Data loading\n", + "The following functions will\n", + "1. Load the preprocessed data\n", + "2. Create samples by dividing the complete spheres in patches (based on healpix sampling). See the function `hp_split` of `experiment_helper.py` for more specific informations.\n", + "\n", + "The function that load the testing data will additionally add the noise to the sample." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_raw_train, labels_raw_train, x_raw_std = experiment_helper.get_training_data(sigma, order)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_raw_test, labels_test, _ = experiment_helper.get_testing_data(sigma, order, sigma_noise, x_raw_std)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4 Classification using Deep Sphere\n", + "\n", + "Let us now classify our data using a spherical convolutional neural network." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1 Preparation of the dataset\n", + "Let us create the datafor the spherical neural network. It is simply the raw data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ret = experiment_helper.data_preprossing(x_raw_train, labels_raw_train, x_raw_test, sigma_noise, feature_type=None, train_size=0.8)\n", + "features_train, labels_train, features_validation, labels_validation, features_test = ret" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The spherical neural network will uses a Dataset object that need to be initialized. The object `LabeledDatasetWithNoise` will add noise to the raw data at the time of training. It will slowly increase the amount of noise during `nit` iteration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from deepsphere.cnn import build_index\n", + "nx = Nside//order\n", + "nlevels = np.round(np.log2(nx)).astype(np.int)\n", + "index = build_index(nlevels).astype(np.int)\n", + "\n", + "features_train = features_train[:, index]\n", + "features_validation = features_validation[:, index]\n", + "shuffle = np.random.permutation(len(features_test))\n", + "features_test = features_test[:, index]\n", + "features_test = features_test[shuffle]\n", + "labels_test = labels_test[shuffle]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "training = LabeledDatasetWithNoise(features_train, labels_train, end_level=sigma_noise)\n", + "validation = LabeledDataset(features_validation, labels_validation)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2 Building the Network\n", + "\n", + "We now create our spherical neural network. We use one architecture, a fully convolutional architecture (see the exact parameters in `hyperparameters.py`), for all the problems (that is for all configurations of `order` and `sigma_noise`. A smaller `order` means more pixels per sample, that is more data for a prediction. It translates to higher accuracy as the network is more confident about its prediction (as they are averaged across spatial locations).\n", + "\n", + "For the paper, we selected a conservative set of parameters that were providing good results across the board. To train faster, diminish `num_epochs`, or interrupt training whenever you get bored. To reproduce all the results from the paper, the easiest is to run the `experiments_deepsphere.py` script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ntype = 'CNN-2d'\n", + "EXP_NAME = '40sim_{}sides_{:0.1f}noise_{}order_{}sigma_{}'.format(Nside, sigma_noise, order, sigma, ntype)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = hyperparameters.get_params(training.N, EXP_NAME, order, Nside, ntype)\n", + "# params['profile'] = True # See computation time and memory usage in Tensorboard.\n", + "# params['debug'] = True # Debug the model in Tensorboard.\n", + "model = models.cnn2d(**params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cleanup before running again.\n", + "shutil.rmtree('summaries/{}/'.format(EXP_NAME), ignore_errors=True)\n", + "shutil.rmtree('checkpoints/{}/'.format(EXP_NAME), ignore_errors=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.3 Find an optimal learning rate (optional)\n", + "\n", + "The learning rate is the most important hyper-parameter. A technique to find an optimal value is to visualize the validation loss while increasing the learning rate. One way to define the optimal learning rate is to search for the largest value looking for which the validation loss still decreases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# backup = params.copy()\n", + "# \n", + "# params, learning_rate = utils.test_learning_rates(params, training.N, 1e-6, 1e-1, num_epochs=20)\n", + "# \n", + "# shutil.rmtree('summaries/{}/'.format(params['dir_name']), ignore_errors=True)\n", + "# shutil.rmtree('checkpoints/{}/'.format(params['dir_name']), ignore_errors=True)\n", + "# \n", + "# model = models.deepsphere(**params)\n", + "# _, loss_validation, _, _ = model.fit(training, validation)\n", + "# \n", + "# params.update(backup)\n", + "#\n", + "# plt.semilogx(learning_rate, loss_validation, '.-')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.4 Training the network\n", + "\n", + "Here are a few remarks.\n", + "* The model will create tensorboard summaries in the `summaries` folder. Start tensorboard with `cd summaries` then `tensorboard --logdir .`, and open in a browser tab to visualize training progress and statistics about the learned parameters. You can debug the model by setting `params['debug'] = True` and launching tensorboard with `tensorboard --logdir . --debugger_port 6064`.\n", + "* You probably need a GPU to train the model in an acceptable amount of time.\n", + "* You will get slightly different results every time the network is trained." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy_validation, loss_validation, loss_training, t_step = model.fit(training, validation)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see below that the classifier does not overfit the training data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# plot.plot_loss(loss_training, loss_validation, t_step, params['eval_frequency'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "error_validation = experiment_helper.model_error(model, features_validation, labels_validation)\n", + "print('The validation error is {:.2%}'.format(error_validation), flush=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "error_test = experiment_helper.model_error(model, features_test, labels_test)\n", + "print('The testing error is {:.2%}'.format(error_test), flush=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plotconv(conv, ax=None):\n", + " sx,sy,nx,ny = conv.shape\n", + " mat = np.zeros((sx*nx+nx-1, sy*ny+ny-1))\n", + " for i in range(nx):\n", + " for j in range(ny):\n", + " mat[i+i*sx:i+(i+1)*sx,j+j*sy:j+(j+1)*sy] = conv[:,:,i,j]\n", + " if ax is None:\n", + " ax = plt.gca()\n", + " v = np.max(abs(mat))\n", + " ax.imshow(mat, cmap=plt.cm.RdBu, vmin=-v, vmax=v)\n", + " \n", + " ticks = np.arange(nx)*(sx+1)+(sx+1)/2-1\n", + " lx = ['In {}'.format(i+1) for i in range(nx)]\n", + " plt.yticks(ticks, lx)\n", + " \n", + " ticks = np.arange(ny)*(sy+1)+(sy+1)/2-1\n", + " ly = ['Out {}'.format(i+1) for i in range(ny)]\n", + " plt.xticks(ticks, ly)\n", + " \n", + " return ax" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get all variable of the graph\n", + "# [n.name for n in model.graph.as_graph_def().node]\n", + "# Get all trainable variable\n", + "# with model.graph.as_default():\n", + "# print(tf.trainable_variables())\n", + "for i in range(5):\n", + " layer = i+1\n", + " plt.figure(figsize=(20,20))\n", + " conv = model.get_var('conv{}/conv2d/w'.format(layer))\n", + " plotconv(conv)\n", + " plt.title('Convolution layer {}'.format(i))\n", + " plt.savefig(\"figures/conv_kernel_layer{}.pdf\".format(i), bbox_inches='tight')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/experiments_2dcnn.py b/experiments_2dcnn.py index 121fac1..db82232 100644 --- a/experiments_2dcnn.py +++ b/experiments_2dcnn.py @@ -14,7 +14,7 @@ import numpy as np import time -from deepsphere.cnn import Healpix2CNN, build_index +from deepsphere.utils import build_index from deepsphere import models, experiment_helper from deepsphere.data import LabeledDatasetWithNoise, LabeledDataset from grid import pgrid