-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #93 from giotto-ai/persistence_diagrams_workflow
Persistence diagrams workflow
- Loading branch information
Showing
59 changed files
with
2,706 additions
and
1,215 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"cells":[{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["import os\n","from typing import Tuple\n","\n","import torch.nn as nn\n","from gdeep.data import PreprocessingPipeline\n","from gdeep.data.datasets import PersistenceDiagramFromFiles\n","from gdeep.data.datasets.base_dataloaders import (DataLoaderBuilder,\n"," DataLoaderParamsTuples)\n","from gdeep.data.datasets.persistence_diagrams_from_graphs_builder import \\\n"," PersistenceDiagramFromGraphBuilder\n","from gdeep.data.persistence_diagrams.one_hot_persistence_diagram import (\n"," OneHotEncodedPersistenceDiagram, collate_fn_persistence_diagrams)\n","from gdeep.data.preprocessors import (\n"," FilterPersistenceDiagramByHomologyDimension,\n"," FilterPersistenceDiagramByLifetime, NormalizationPersistenceDiagram)\n","from gdeep.search.hpo import GiottoSummaryWriter\n","from gdeep.topology_layers import Persformer, PersformerConfig, PersformerWrapper\n","from gdeep.topology_layers.persformer_config import PoolerType\n","from gdeep.trainer.trainer import Trainer\n","from gdeep.search import HyperParameterOptimization\n","from gdeep.utility import DEFAULT_GRAPH_DIR, PoolerType\n","from gdeep.utility.utils import autoreload_if_notebook\n","from sklearn.model_selection import train_test_split\n","from torch.optim import Adam\n","from torch.utils.data import Subset\n","from torch.utils.tensorboard.writer import SummaryWriter\n","\n","autoreload_if_notebook()\n","\n","# Parameters\n","name_graph_dataset: str = 'MUTAG'\n","diffusion_parameter: float = 0.1\n","num_homology_types: int = 4\n","\n","\n","# Create the persistence diagram dataset\n","pd_creator = PersistenceDiagramFromGraphBuilder(name_graph_dataset, diffusion_parameter)\n","pd_creator.create()"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# Plot sample extended persistence diagram\n","file_path: str = os.path.join(DEFAULT_GRAPH_DIR,\n"," f\"MUTAG_{diffusion_parameter}_extended_persistence\", \"diagrams\")\n","graph_idx = 1\n","pd: OneHotEncodedPersistenceDiagram = \\\n"," OneHotEncodedPersistenceDiagram.load(os.path.join(file_path, \n"," f\"{graph_idx}.npy\"))\n","pd.set_homology_dimension_names([\"Ord0\", \"Ext0\", \"Rel1\", \"Ext1\"])\n","pd.plot()"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["\n","pd_mutag_ds = PersistenceDiagramFromFiles(\n"," os.path.join(\n"," DEFAULT_GRAPH_DIR, f\"MUTAG_{diffusion_parameter}_extended_persistence\"\n"," )\n",")\n","\n","pd_sample: OneHotEncodedPersistenceDiagram = pd_mutag_ds[0][0]\n","\n","fig = pd_sample.plot([\"Ord0\", \"Ext0\", \"Rel1\", \"Ext1\"])\n","# add title\n","fig.show()"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["\n","# Create the train/validation/test split\n","\n","train_indices, test_indices = train_test_split(\n"," range(len(pd_mutag_ds)),\n"," test_size=0.2,\n"," random_state=42,\n",")\n","\n","train_indices , validation_indices = train_test_split(\n"," train_indices,\n"," test_size=0.2,\n"," random_state=42,\n",")\n","\n","# Create the data loaders\n","train_dataset = Subset(pd_mutag_ds, train_indices)\n","validation_dataset = Subset(pd_mutag_ds, validation_indices)\n","test_dataset = Subset(pd_mutag_ds, test_indices)\n","\n","# Preprocess the data\n","preprocessing_pipeline = PreprocessingPipeline[Tuple[OneHotEncodedPersistenceDiagram, int]](\n"," (\n"," FilterPersistenceDiagramByHomologyDimension[int]([0, 1]),\n"," FilterPersistenceDiagramByLifetime[int](min_lifetime=-0.1, max_lifetime=1.0),\n"," NormalizationPersistenceDiagram[int](num_homology_dimensions=4),\n"," )\n",")\n","\n","preprocessing_pipeline.fit_to_dataset(train_dataset)\n",""]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["train_dataset = preprocessing_pipeline.attach_transform_to_dataset(train_dataset) # type: ignore\n","validation_dataset = preprocessing_pipeline.attach_transform_to_dataset(validation_dataset) # type: ignore\n","test_dataset = preprocessing_pipeline.attach_transform_to_dataset(test_dataset) # type: ignore\n",""]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["\n","dl_params = DataLoaderParamsTuples.default(\n"," batch_size=32,\n"," num_workers=0,\n"," collate_fn=collate_fn_persistence_diagrams,\n"," with_validation=True,\n",")\n","\n","\n","# Build the data loaders\n","dlb = DataLoaderBuilder((train_dataset, validation_dataset, test_dataset)) # type: ignore\n","dl_train, dl_val, dl_test = dlb.build(dl_params) # type: ignore"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["\n","# # Define the model\n","# model_config = PersformerConfig(\n","# num_layers=6,\n","# num_attention_heads=4,\n","# input_size= 2 + num_homology_types,\n","# ouptut_size=2,\n","# pooler_type=PoolerType.ATTENTION,\n","# )\n","\n","# model = Persformer(model_config)\n","# writer = SummaryWriter()\n","\n","# loss_function = nn.CrossEntropyLoss()\n","\n","# trainer = Trainer(model, [dl_train, dl_val, dl_test], loss_function, writer)\n","\n","# trainer.train(Adam, 3, False, \n","# {\"lr\":0.01}, \n","# {\"batch_size\":16, \"collate_fn\": collate_fn_persistence_diagrams})"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["\n","# Define the model by using a Wrapper for the Persformer model\n","wrapped_model = PersformerWrapper(\n"," num_attention_layers=3,\n"," num_attention_heads=4,\n"," input_size= 2 + num_homology_types,\n"," ouptut_size=2,\n"," pooler_type=PoolerType.ATTENTION,\n",")\n","writer = GiottoSummaryWriter()\n","\n","loss_function = nn.CrossEntropyLoss()\n","\n","trainer = Trainer(wrapped_model, [dl_train, dl_val, dl_test], loss_function, writer)\n","\n","# initialise hpo object\n","search = HyperParameterOptimization(trainer, \"accuracy\", 2, best_not_last=True)\n","\n","# if you want to store pickle files of the models instead of the state_dicts\n","search.store_pickle = True\n","\n","# dictionaries of hyperparameters\n","optimizers_params = {\"lr\": [0.001, 0.01]}\n","dataloaders_params = {\"batch_size\": [32, 64, 16], \n"," \"collate_fn\": [collate_fn_persistence_diagrams]}\n","models_hyperparams = {\n"," \"num_attention_layers\": [2, 6, 1],\n"," \"num_attention_heads\": [8, 16, 8],\n","}\n","\n","# starting the HPO\n","search.start(\n"," [Adam],\n"," 3,\n"," False,\n"," optimizers_params,\n"," dataloaders_params,\n"," models_hyperparams,\n"," n_accumulated_grads=2,\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[""]}],"nbformat":4,"nbformat_minor":2,"metadata":{"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":3},"orig_nbformat":4}} |
Oops, something went wrong.