diff --git a/pyproject.toml b/pyproject.toml
index f236e51..a54e0b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ license = { text = "BSD 3-Clause License" }
 authors = [
     { email = "rhoadesj@hhmi.org", name = "Jeff Rhoades" },
 ]
-version = "0.1.2"
+version = "0.1.3"
 dependencies = [
   'torch',
   'torchvision',
diff --git a/src/cellmap_models/pytorch/__init__.py b/src/cellmap_models/pytorch/__init__.py
index 963639f..eb363ef 100755
--- a/src/cellmap_models/pytorch/__init__.py
+++ b/src/cellmap_models/pytorch/__init__.py
@@ -1 +1,18 @@
-from . import cosem, cellpose, untrained_models
+import lazy_loader as lazy
+
+# Lazy-load submodules
+__getattr__, __dir__, __all__ = lazy.attach(
+    __name__,
+    submod_attrs={
+        "cellpose": [
+            "add_model",
+            "load_model",
+            "get_model",
+            "download_checkpoint",
+            "models_dict",
+            "models_list",
+        ]
+    },
+)
+
+from . import cosem, untrained_models
diff --git a/src/cellmap_models/pytorch/cosem/load_model.py b/src/cellmap_models/pytorch/cosem/load_model.py
index 17c9523..6aff03b 100755
--- a/src/cellmap_models/pytorch/cosem/load_model.py
+++ b/src/cellmap_models/pytorch/cosem/load_model.py
@@ -229,28 +229,29 @@ def compute_minimal_shapes(self):
 
             min_input_shape += total_pad
 
-        # PART 3: calculate the minimum output shape by propagating from the "bottom right" to the output of the U-Net
-        min_output_shape = np.copy(min_bottom_right)
-        for lv in range(len(self.downsample_factors))[
-            ::-1
-        ]:  # go through upsampling path
-            min_output_shape *= self.downsample_factors[
-                lv
-            ]  # calculate shape after upsampling
-
-            # calculate shape after convolutions on current level
-            kernels = np.copy(self.kernel_sizes_up[lv])
-            total_pad = np.sum(
-                [np.array(k) - np.array((1.0, 1.0, 1.0)) for k in kernels], axis=0
-            )
-
-            # same rational for translational equivariance as above in PART 1
-            total_pad = np.ceil(
-                total_pad / np.prod(self.downsample_factors[lv:], axis=0, dtype=float)
-            ) * np.prod(self.downsample_factors[lv:], axis=0)
-            min_output_shape -= total_pad
+        # # PART 3: calculate the minimum output shape by propagating from the "bottom right" to the output of the U-Net
+        # min_output_shape = np.copy(min_bottom_right)
+        # for lv in range(len(self.downsample_factors))[
+        #     ::-1
+        # ]:  # go through upsampling path
+        #     min_output_shape *= self.downsample_factors[
+        #         lv
+        #     ]  # calculate shape after upsampling
+
+        #     # calculate shape after convolutions on current level
+        #     kernels = np.copy(self.kernel_sizes_up[lv])
+        #     total_pad = np.sum(
+        #         [np.array(k) - np.array((1.0, 1.0, 1.0)) for k in kernels], axis=0
+        #     )
+
+        #     # same rational for translational equivariance as above in PART 1
+        #     total_pad = np.ceil(
+        #         total_pad / np.prod(self.downsample_factors[lv:], axis=0, dtype=float)
+        #     ) * np.prod(self.downsample_factors[lv:], axis=0)
+        #     min_output_shape -= total_pad
 
         self.min_input_shape = [int(s) for s in min_input_shape]
+        min_output_shape = self(torch.ones((1, 1, *self.min_input_shape))).shape[2:]
         self.min_output_shape = [int(s) for s in min_output_shape]
         self.input_size_step = [int(s) for s in step]