replace silu, reduce code

Asif Ahmed · Asif Ahmed · commit e5a36403bc24 · 2023-04-18T20:10:01.000+06:00
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
     name='vqcompress',
     author='Asif Ahmed',
     description='Image compression with vqgan, autoencoder etc.',
-    version='0.1.7',
+    version='0.1.8',
     url='https://github.com/quickgrid/vq-compress',
     packages=find_packages(),
     classifiers=[
diff --git a/vqcompress/core/ldm/autoencoder.py b/vqcompress/core/ldm/autoencoder.py
@@ -79,23 +79,6 @@ def get_input(self, batch, k):
     def get_last_layer(self):
         return self.decoder.conv_out.weight
 
-    @torch.no_grad()
-    def log_images(self, batch, only_inputs=False, **kwargs):
-        log = dict()
-        x = self.get_input(batch, self.image_key)
-        x = x.to(self.device)
-        if not only_inputs:
-            xrec, posterior = self(x)
-            if x.shape[1] > 3:
-                # colorize with random projection
-                assert xrec.shape[1] > 3
-                x = self.to_rgb(x)
-                xrec = self.to_rgb(xrec)
-            log["samples"] = self.decode(torch.randn_like(posterior.sample()))
-            log["reconstructions"] = xrec
-        log["inputs"] = x
-        return log
-
     def to_rgb(self, x):
         assert self.image_key == "segmentation"
         if not hasattr(self, "colorize"):
@@ -240,7 +223,6 @@ def __init__(self,
                  kl_weight=1e-8,
                  remap=None,
                  ):
-
         z_channels = ddconfig["z_channels"]
         super().__init__(ddconfig,
                          # lossconfig,
@@ -256,75 +238,22 @@ def __init__(self,
         # self.loss.n_classes = n_embed
         self.vocab_size = n_embed
 
-        self.quantize = GumbelQuantize(z_channels, embed_dim,
-                                       n_embed=n_embed,
-                                       kl_weight=kl_weight, temp_init=1.0,
-                                       remap=remap)
+        self.quantize = GumbelQuantize(
+            z_channels, embed_dim,
+            n_embed=n_embed,
+            kl_weight=kl_weight, temp_init=1.0,
+            remap=remap
+        )
 
         # self.temperature_scheduler = instantiate_from_config(temperature_scheduler_config)  # annealing of temp
 
         if ckpt_path is not None:
             self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
 
-    def temperature_scheduling(self):
-        self.quantize.temperature = self.temperature_scheduler(self.global_step)
-
     def encode_to_prequant(self, x):
         h = self.encoder(x)
         h = self.quant_conv(h)
         return h
 
     def decode_code(self, code_b):
         raise NotImplementedError
-
-    def training_step(self, batch, batch_idx, optimizer_idx):
-        self.temperature_scheduling()
-        x = self.get_input(batch, self.image_key)
-        xrec, qloss = self(x)
-
-        if optimizer_idx == 0:
-            # autoencode
-            aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
-                                            last_layer=self.get_last_layer(), split="train")
-
-            self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True)
-            self.log("temperature", self.quantize.temperature, prog_bar=False, logger=True, on_step=True, on_epoch=True)
-            return aeloss
-
-        if optimizer_idx == 1:
-            # discriminator
-            discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
-                                                last_layer=self.get_last_layer(), split="train")
-            self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True)
-            return discloss
-
-    def validation_step(self, batch, batch_idx):
-        x = self.get_input(batch, self.image_key)
-        xrec, qloss = self(x, return_pred_indices=True)
-        aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0, self.global_step,
-                                        last_layer=self.get_last_layer(), split="val")
-
-        discloss, log_dict_disc = self.loss(qloss, x, xrec, 1, self.global_step,
-                                            last_layer=self.get_last_layer(), split="val")
-        rec_loss = log_dict_ae["val/rec_loss"]
-        self.log("val/rec_loss", rec_loss,
-                 prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
-        self.log("val/aeloss", aeloss,
-                 prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
-        self.log_dict(log_dict_ae)
-        self.log_dict(log_dict_disc)
-        return self.log_dict
-
-    def log_images(self, batch, **kwargs):
-        log = dict()
-        x = self.get_input(batch, self.image_key)
-        x = x.to(self.device)
-        # encode
-        h = self.encoder(x)
-        h = self.quant_conv(h)
-        quant, _, _ = self.quantize(h)
-        # decode
-        x_rec = self.decode(quant)
-        log["inputs"] = x
-        log["reconstructions"] = x_rec
-        return log
diff --git a/vqcompress/core/ldm/distributions.py b/vqcompress/core/ldm/distributions.py
@@ -1,5 +1,5 @@
-import torch
 import numpy as np
+import torch
 
 
 class DiagonalGaussianDistribution(object):
@@ -22,22 +22,26 @@ def kl(self, other=None):
             return torch.Tensor([0.])
         else:
             if other is None:
-                return 0.5 * torch.sum(torch.pow(self.mean, 2)
-                                       + self.var - 1.0 - self.logvar,
-                                       dim=[1, 2, 3])
+                return 0.5 * torch.sum(
+                    torch.pow(self.mean, 2)
+                    + self.var - 1.0 - self.logvar,
+                    dim=[1, 2, 3]
+                )
             else:
                 return 0.5 * torch.sum(
                     torch.pow(self.mean - other.mean, 2) / other.var
                     + self.var / other.var - 1.0 - self.logvar + other.logvar,
-                    dim=[1, 2, 3])
+                    dim=[1, 2, 3]
+                )
 
-    def nll(self, sample, dims=[1,2,3]):
+    def nll(self, sample, dims=[1, 2, 3]):
         if self.deterministic:
             return torch.Tensor([0.])
         logtwopi = np.log(2.0 * np.pi)
         return 0.5 * torch.sum(
             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
-            dim=dims)
+            dim=dims
+        )
 
     def mode(self):
         return self.mean
diff --git a/vqcompress/core/ldm/model.py b/vqcompress/core/ldm/model.py
@@ -4,18 +4,13 @@
 import torch
 import torch.nn as nn
 from einops import rearrange
-
+import torch.nn.functional as F
 try:
     import xformers.ops
 except ModuleNotFoundError as err:
     print(err)
 
 
-def nonlinearity(x):
-    # swish
-    return x * torch.sigmoid(x)
-
-
 def Normalize(in_channels, num_groups=32):
     return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)
 
@@ -115,14 +110,14 @@ def __init__(
     def forward(self, x, temb):
         h = x
         h = self.norm1(h)
-        h = nonlinearity(h)
+        h = F.silu(h)
         h = self.conv1(h)
 
         if temb is not None:
-            h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None]
+            h = h + self.temb_proj(F.silu(temb))[:, :, None, None]
 
         h = self.norm2(h)
-        h = nonlinearity(h)
+        h = F.silu(h)
         h = self.dropout(h)
         h = self.conv2(h)
 
@@ -349,7 +344,7 @@ def forward(self, x):
 
         # end
         h = self.norm_out(h)
-        h = nonlinearity(h)
+        h = F.silu(h)
         h = self.conv_out(h)
         return h
 
@@ -470,7 +465,7 @@ def forward(self, z):
             return h
 
         h = self.norm_out(h)
-        h = nonlinearity(h)
+        h = F.silu(h)
         h = self.conv_out(h)
         if self.tanh_out:
             h = torch.tanh(h)