awslabs · gianlucadetommaso · May 15, 2023 · May 15, 2023 · May 15, 2023 · May 15, 2023
diff --git a/benchmarks/transformers/masked_language_modeling.py b/benchmarks/transformers/masked_language_modeling.py
@@ -193,13 +193,13 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
 
     try:
         logger.info(list(pathlib.Path(args.restore_checkpoint_dir).rglob("*")))
-        restore_checkpoint_path = unpack_model_tar(
+        restore_checkpoint_dir = unpack_model_tar(
             list(pathlib.Path(args.restore_checkpoint_dir).rglob("*"))[0]
         )
-        logger.info(list(pathlib.Path(restore_checkpoint_path).rglob("*")))
+        logger.info(list(pathlib.Path(restore_checkpoint_dir).rglob("*")))
     except:
         logger.info("No checkpoint to restore")
-        restore_checkpoint_path = None
+        restore_checkpoint_dir = None
 
     tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
 
@@ -303,11 +303,11 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
     ####           TRAIN!            ####
     #####################################
     def accuracy_mlm(preds: Array, targets: Array) -> jnp.ndarray:
-        if preds.ndim > 2:
+        if preds.ndim > 1:
             raise ValueError(
                 """`preds` must be a one-dimensional array of predicted classes."""
             )
-        if targets.ndim > 2:
+        if targets.ndim > 1:
             raise ValueError(
                 """`targets` must be a one-dimensional array of target classes."""
             )
@@ -341,7 +341,7 @@ def accuracy_mlm(preds: Array, targets: Array) -> jnp.ndarray:
             save_checkpoint_dir=args.save_checkpoint_dir,
             save_every_n_steps=args.save_every_n_steps,
             keep_top_n_checkpoints=args.keep_top_n_checkpoints,
-            restore_checkpoint_path=restore_checkpoint_path,
+            restore_checkpoint_dir=restore_checkpoint_dir,
         ),
     )
     if args.last_layer_only and (
@@ -357,7 +357,7 @@ def accuracy_mlm(preds: Array, targets: Array) -> jnp.ndarray:
             and args.last_layer_only
             else None,
         )
-        if restore_checkpoint_path is not None:
+        if restore_checkpoint_dir is not None:
             fit_config.optimizer = last_layer_optimizer
             train_kwargs = {"fit_config": fit_config}
         else:

diff --git a/benchmarks/transformers/prob_model_text_classification.py b/benchmarks/transformers/prob_model_text_classification.py
@@ -36,6 +36,7 @@
     accuracy,
     expected_calibration_error,
 )
+from fortuna.model_editor import ProbitModelEditor
 from fortuna.prob_model import (
     ADVIPosteriorApproximator,
     DeepEnsemblePosteriorApproximator,
@@ -213,6 +214,11 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
     parser.add_argument("--sgmcmc_polynomial_schedule_gamma", type=float, default=0.55)
     parser.add_argument("--sgmcmc_preconditioner", type=strbool, default=False)
     parser.add_argument("--sghmc_momentum_decay", type=float, default=0.01)
+    # model editor
+    parser.add_argument("--enable_probit_model_editor", type=strbool, default=False)
+    parser.add_argument("--probit_init_log_var", type=float, default=-5)
+    parser.add_argument("--probit_stop_gradient", type=strbool, default=False)
+    parser.add_argument("--probit_last_layer_only", type=strbool, default=False)
     # optimizer
     parser.add_argument("--learning_rate", type=float, default=2e-5)
     parser.add_argument("--adam_eps", type=float, default=1e-8)
@@ -234,13 +240,13 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
 
     try:
         logger.info(list(pathlib.Path(args.load_model_dir).rglob("*")))
-        restore_checkpoint_path = unpack_model_tar(
+        restore_checkpoint_dir = unpack_model_tar(
             list(pathlib.Path(args.load_model_dir).rglob("*"))[0]
         )
-        logger.info(list(pathlib.Path(restore_checkpoint_path).rglob("*")))
+        logger.info(list(pathlib.Path(restore_checkpoint_dir).rglob("*")))
     except:
         logger.info("No checkpoint to restore")
-        restore_checkpoint_path = None
+        restore_checkpoint_dir = None
 
     tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
 
@@ -392,6 +398,21 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
         ),
     }
 
+    model_editor = None
+    if args.enable_probit_model_editor:
+        probit_freeze_fun = (
+            lambda p, v: True
+            if "classifier" in p
+            else False
+            if args.probit_last_layer_only
+            else None
+        )
+        model_editor = ProbitModelEditor(
+            freeze_fun=probit_freeze_fun,
+            init_log_var=args.probit_init_log_var,
+            stop_gradient=args.probit_stop_gradient,
+        )
+
     ### TRAINING
     prob_model = ProbClassifier(
         model=model,
@@ -400,6 +421,7 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
         ],
         prior=IsotropicGaussianPrior(log_var=args.prior_log_var),
         output_calibrator=None,
+        model_editor=model_editor
     )
 
     fit_config = FitConfig(
@@ -422,7 +444,7 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
             save_checkpoint_dir=args.output_data_dir,
             save_every_n_steps=args.save_every_n_steps,
             keep_top_n_checkpoints=args.keep_top_n_checkpoints,
-            restore_checkpoint_path=restore_checkpoint_path,
+            restore_checkpoint_dir=restore_checkpoint_dir,
         ),
         callbacks=[
             ResetCovarianceCallback(
@@ -453,7 +475,7 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
             last_layer_optimizer = FitOptimizer(
                 method=optimizer, n_epochs=args.num_train_epochs, freeze_fun=freeze_fun
             )
-            if restore_checkpoint_path is not None:
+            if restore_checkpoint_dir is not None:
                 fit_config.optimizer = last_layer_optimizer
                 train_kwargs = {"fit_config": fit_config}
             else:
@@ -478,11 +500,16 @@ def unpack_model_tar(model_ckpt_path: pathlib.Path) -> pathlib.Path:
             calib_data_loader=None,
             **train_kwargs,
         )
-    elif restore_checkpoint_path is not None:
-        prob_model.load_state(restore_checkpoint_path)
+    elif restore_checkpoint_dir is not None:
+        prob_model.load_state(restore_checkpoint_dir)
     else:
         raise ValueError(
-            "Either restore_checkpoint_path or num_train_epochs > 0 should be specified."
+            "Either restore_checkpoint_dir or num_train_epochs > 0 should be specified."
+        )
+
+    if args.enable_probit_model_editor:
+        logger.info(
+            f"Probit log-variance: {prob_model.posterior.state.get().params['model_editor']['params']['log_var']}"
         )
 
     ### IN-D PERFORMANCE

diff --git a/...rks/transformers/sagemaker_entrypoints/prob_model_text_classification_config/default.yaml b/...rks/transformers/sagemaker_entrypoints/prob_model_text_classification_config/default.yaml
diff --git a/.../transformers/sagemaker_entrypoints/prob_model_text_classification_config/model/bert.yaml b/.../transformers/sagemaker_entrypoints/prob_model_text_classification_config/model/bert.yaml
@@ -4,6 +4,6 @@ hparams:
   per_device_eval_batch_size: 32
   per_device_train_batch_size: 32
   learning_rate: 2e-05
-  num_warmup_steps: 10000
+  num_warmup_steps: 500
   prior_log_var: 100.0
   weight_decay: 0.01
diff --git a/docs/source/references/output_calib_model/output_calib_model.rst b/docs/source/references/output_calib_model/output_calib_model.rst
@@ -8,19 +8,19 @@ Please find their references below.
 
 .. automodule:: fortuna.output_calib_model.classification
     :members:
-    :exclude-members: get_path_latest_checkpoint, save_checkpoint, restore_checkpoint
+    :exclude-members: save_checkpoint, restore_checkpoint
 
 .. _output_calib_regressor:
 
 .. automodule:: fortuna.output_calib_model.regression
     :members:
-    :exclude-members: get_path_latest_checkpoint, save_checkpoint, restore_checkpoint
+    :exclude-members: save_checkpoint, restore_checkpoint
 
 .. _output_calib_base:
 
 .. automodule:: fortuna.output_calib_model.base
     :members:
-    :exclude-members: get_path_latest_checkpoint, save_checkpoint, restore_checkpoint
+    :exclude-members: save_checkpoint, restore_checkpoint
 
 .. toctree::
    :maxdepth: 1

diff --git a/examples/scaling_up_bayesian_inference.pct.py b/examples/scaling_up_bayesian_inference.pct.py
@@ -89,7 +89,7 @@ def __call__(self, x, train: bool = False, **kwargs) -> jnp.ndarray:
 
 # We are ready to call `prob_model.train`, which will perform posterior inference under-the-hood. In order to do Bayesian inference on the last layer only and freeze the other parameters, all we need to do is to pass a function `freeze_fun` to the optimizer configuration object, deciding which parameters should be "frozen" and which should be "trainable".
 #
-# In addition, we configure `map_fit_config` to make a preliminary run with MAP, and set the frozen parameters to a meaningful value. Alternatively, if any of these is available, you can also either restore an existing checkpoint by configuring `FitCheckpointer.restore_checkpoint_path`, or start from a current state by setting `FitCheckpointer.start_from_current_state` to `True`.
+# In addition, we configure `map_fit_config` to make a preliminary run with MAP, and set the frozen parameters to a meaningful value. Alternatively, if any of these is available, you can also either restore an existing checkpoint by configuring `FitCheckpointer.restore_checkpoint_dir`, or start from a current state by setting `FitCheckpointer.start_from_current_state` to `True`.
 
 from fortuna.prob_model import FitConfig, FitOptimizer