From 581cad51130afb1e52993ada2f586cf16b2d1c64 Mon Sep 17 00:00:00 2001
From: Rex <hkchengrex@gmail.com>
Date: Tue, 17 Oct 2023 18:30:19 -0500
Subject: [PATCH] fix overlay alpha computation

---
 inference/interact/interactive_utils.py | 33 +++++++++++--------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/inference/interact/interactive_utils.py b/inference/interact/interactive_utils.py
index 93005c44..d0dfcf6b 100644
--- a/inference/interact/interactive_utils.py
+++ b/inference/interact/interactive_utils.py
@@ -107,12 +107,12 @@ def overlay_layer(image, mask, layer, target_object):
     # insert a layer between foreground and background
     # The CPU version is less accurate because we are using the hard mask
     # The GPU version has softer edges as it uses soft probabilities
-    obj_mask = (np.isin(mask, target_object)).astype(np.float32)
-    layer_alpha = layer[:, :, 3].astype(np.float32) / 255
+    obj_mask = (np.isin(mask, target_object)).astype(np.float32)[:, :, np.newaxis]
+    layer_alpha = layer[:, :, 3].astype(np.float32)[:, :, np.newaxis] / 255
     layer_rgb = layer[:, :, :3]
-    background_alpha = np.maximum(obj_mask, layer_alpha)[:,:,np.newaxis]
-    obj_mask = obj_mask[:,:,np.newaxis]
-    im_overlay = (image*(1-background_alpha) + layer_rgb*(1-obj_mask) + image*obj_mask).clip(0, 255)
+    background_alpha = np.maximum(obj_mask, layer_alpha)
+    im_overlay = (image * (1 - background_alpha) + layer_rgb * (1 - obj_mask) * layer_alpha +
+                  image * obj_mask).clip(0, 255)
     return im_overlay.astype(image.dtype)
 
 def overlay_davis_torch(image, mask, alpha=0.5, fade=False):
@@ -156,28 +156,25 @@ def overlay_popup_torch(image, mask, target_object):
 
     return im_overlay
 
-def overlay_layer_torch(image, mask, layer, target_object):
+def overlay_layer_torch(image, prob, layer, target_object):
     # insert a layer between foreground and background
     # The CPU version is less accurate because we are using the hard mask
     # The GPU version has softer edges as it uses soft probabilities
     image = image.permute(1, 2, 0)
 
     if len(target_object) == 0:
-        obj_mask = torch.zeros_like(mask[0])
+        obj_mask = torch.zeros_like(prob[0]).unsqueeze(2)
     else:
-        # I should not need to convert this to numpy.
-        # uUsing list works most of the time but consistently fails
-        # if I include first object -> exclude it -> include it again.
-        # I check everywhere and it makes absolutely no sense.
-        # I am blaming this on PyTorch and calling it a day
-        obj_mask = mask[np.array(target_object,dtype=np.int32)].sum(0)
-    layer_alpha = layer[:, :, 3]
+        # TODO: figure out why we need to convert this to numpy array
+        obj_mask = prob[np.array(target_object, dtype=np.int32)].sum(0).unsqueeze(2)
+    layer_alpha = layer[:, :, 3].unsqueeze(2)
     layer_rgb = layer[:, :, :3]
-    background_alpha = torch.maximum(obj_mask, layer_alpha).unsqueeze(2)
-    obj_mask = obj_mask.unsqueeze(2)
-    im_overlay = (image*(1-background_alpha) + layer_rgb*(1-obj_mask) + image*obj_mask).clip(0, 1)
+    background_alpha = torch.maximum(obj_mask, layer_alpha)
+    im_overlay = (image * (1 - background_alpha) + layer_rgb * (1 - obj_mask) * layer_alpha +
+                  image * obj_mask).clip(0, 1)
 
-    im_overlay = (im_overlay*255).cpu().numpy()
+    im_overlay = (im_overlay * 255).cpu().numpy()
     im_overlay = im_overlay.astype(np.uint8)
 
     return im_overlay
+