From b1094d731b495a5568c9762604785241a879b2fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=2ED=2ET=C3=A0i?= <tai17082006.tk@gmail.com>
Date: Wed, 4 Sep 2024 10:27:22 +0700
Subject: [PATCH] Add scale_stick_for_xinsr_cn on DWPose and OpenPose. Close
 https://github.com/Fannovel16/comfyui_controlnet_aux/issues/447

---
 node_wrappers/dwpose.py                         |  8 +++++---
 node_wrappers/openpose.py                       |  8 +++++---
 pyproject.toml                                  |  2 +-
 src/custom_controlnet_aux/dwpose/__init__.py    |  8 ++++----
 src/custom_controlnet_aux/dwpose/util.py        | 13 +++++++++++--
 src/custom_controlnet_aux/open_pose/__init__.py |  8 ++++----
 src/custom_controlnet_aux/open_pose/util.py     | 11 +++++++++--
 7 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/node_wrappers/dwpose.py b/node_wrappers/dwpose.py
index 75b62bf..904518c 100644
--- a/node_wrappers/dwpose.py
+++ b/node_wrappers/dwpose.py
@@ -42,7 +42,8 @@ def INPUT_TYPES(s):
             pose_estimator=INPUT.COMBO(
                 ["dw-ll_ucoco_384_bs5.torchscript.pt", "dw-ll_ucoco_384.onnx", "dw-ll_ucoco.onnx"],
                 default="dw-ll_ucoco_384_bs5.torchscript.pt"
-            )
+            ),
+            scale_stick_for_xinsr_cn=INPUT.COMBO(["disable", "enable"])
         )
 
     RETURN_TYPES = ("IMAGE", "POSE_KEYPOINT")
@@ -50,7 +51,7 @@ def INPUT_TYPES(s):
 
     CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators"
 
-    def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detect_face="enable", resolution=512, bbox_detector="yolox_l.onnx", pose_estimator="dw-ll_ucoco_384.onnx", **kwargs):
+    def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detect_face="enable", resolution=512, bbox_detector="yolox_l.onnx", pose_estimator="dw-ll_ucoco_384.onnx", scale_stick_for_xinsr_cn="disable", **kwargs):
         if bbox_detector == "yolox_l.onnx":
             yolo_repo = DWPOSE_MODEL_NAME
         elif "yolox" in bbox_detector:
@@ -78,13 +79,14 @@ def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detec
         detect_hand = detect_hand == "enable"
         detect_body = detect_body == "enable"
         detect_face = detect_face == "enable"
+        scale_stick_for_xinsr_cn = scale_stick_for_xinsr_cn == "enable"
         self.openpose_dicts = []
         def func(image, **kwargs):
             pose_img, openpose_dict = model(image, **kwargs)
             self.openpose_dicts.append(openpose_dict)
             return pose_img
 
-        out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, resolution=resolution)
+        out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, resolution=resolution, xinsr_stick_scaling=scale_stick_for_xinsr_cn)
         del model
         return {
             'ui': { "openpose_json": [json.dumps(self.openpose_dicts, indent=4)] },
diff --git a/node_wrappers/openpose.py b/node_wrappers/openpose.py
index ade5b18..c579e6c 100644
--- a/node_wrappers/openpose.py
+++ b/node_wrappers/openpose.py
@@ -9,7 +9,8 @@ def INPUT_TYPES(s):
             detect_hand=INPUT.COMBO(["enable", "disable"]),
             detect_body=INPUT.COMBO(["enable", "disable"]),
             detect_face=INPUT.COMBO(["enable", "disable"]),
-            resolution=INPUT.RESOLUTION()
+            resolution=INPUT.RESOLUTION(),
+            scale_stick_for_xinsr_cn=INPUT.COMBO(["disable", "enable"])
         )
         
     RETURN_TYPES = ("IMAGE", "POSE_KEYPOINT")
@@ -17,12 +18,13 @@ def INPUT_TYPES(s):
 
     CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators"
 
-    def estimate_pose(self, image, detect_hand, detect_body, detect_face, resolution=512, **kwargs):
+    def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detect_face="enable", scale_stick_for_xinsr_cn="disable", resolution=512, **kwargs):
         from custom_controlnet_aux.open_pose import OpenposeDetector
 
         detect_hand = detect_hand == "enable"
         detect_body = detect_body == "enable"
         detect_face = detect_face == "enable"
+        scale_stick_for_xinsr_cn = scale_stick_for_xinsr_cn == "enable"
 
         model = OpenposeDetector.from_pretrained().to(model_management.get_torch_device())        
         self.openpose_dicts = []
@@ -31,7 +33,7 @@ def func(image, **kwargs):
             self.openpose_dicts.append(openpose_dict)
             return pose_img
         
-        out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, resolution=resolution)
+        out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, xinsr_stick_scaling=scale_stick_for_xinsr_cn, resolution=resolution)
         del model
         return {
             'ui': { "openpose_json": [json.dumps(self.openpose_dicts, indent=4)] },
diff --git a/pyproject.toml b/pyproject.toml
index de7db81..b584940 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "comfyui_controlnet_aux"
 description = "Plug-and-play ComfyUI node sets for making ControlNet hint images"
 
-version = "1.0.4-alpha.8"
+version = "1.0.4-alpha.9"
 dependencies = ["torch", "importlib_metadata", "huggingface_hub", "scipy", "opencv-python>=4.7.0.72", "filelock", "numpy", "Pillow", "einops", "torchvision", "pyyaml", "scikit-image", "python-dateutil", "mediapipe", "svglib", "fvcore", "yapf", "omegaconf", "ftfy", "addict", "yacs", "trimesh[easy]", "albumentations", "scikit-learn", "matplotlib"]
 
 [project.urls]
diff --git a/src/custom_controlnet_aux/dwpose/__init__.py b/src/custom_controlnet_aux/dwpose/__init__.py
index 3730634..f6042f6 100644
--- a/src/custom_controlnet_aux/dwpose/__init__.py
+++ b/src/custom_controlnet_aux/dwpose/__init__.py
@@ -91,7 +91,7 @@ def draw_animalpose(canvas: np.ndarray, keypoints: list[Keypoint]) -> np.ndarray
     return canvas
 
 
-def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True):
+def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True, xinsr_stick_scaling=False):
     """
     Draw the detected poses on an empty canvas.
 
@@ -110,7 +110,7 @@ def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, dr
 
     for pose in poses:
         if draw_body:
-            canvas = util.draw_bodypose(canvas, pose.body.keypoints)
+            canvas = util.draw_bodypose(canvas, pose.body.keypoints, xinsr_stick_scaling)
 
         if draw_hand:
             canvas = util.draw_handpose(canvas, pose.left_hand)
@@ -252,7 +252,7 @@ def detect_poses(self, oriImg) -> List[PoseResult]:
             keypoints_info = self.dw_pose_estimation(oriImg.copy())
             return Wholebody.format_result(keypoints_info)
     
-    def __call__(self, input_image, detect_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", image_and_json=False, upscale_method="INTER_CUBIC", **kwargs):
+    def __call__(self, input_image, detect_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", image_and_json=False, upscale_method="INTER_CUBIC", xinsr_stick_scaling=False, **kwargs):
         if hand_and_face is not None:
             warnings.warn("hand_and_face is deprecated. Use include_hand and include_face instead.", DeprecationWarning)
             include_hand = hand_and_face
@@ -262,7 +262,7 @@ def __call__(self, input_image, detect_resolution=512, include_body=True, includ
         input_image, _ = resize_image_with_pad(input_image, 0, upscale_method)
         poses = self.detect_poses(input_image)
         
-        canvas = draw_poses(poses, input_image.shape[0], input_image.shape[1], draw_body=include_body, draw_hand=include_hand, draw_face=include_face)
+        canvas = draw_poses(poses, input_image.shape[0], input_image.shape[1], draw_body=include_body, draw_hand=include_hand, draw_face=include_face, xinsr_stick_scaling=xinsr_stick_scaling)
         canvas, remove_pad = resize_image_with_pad(canvas, detect_resolution, upscale_method)
         detected_map = HWC3(remove_pad(canvas))
 
diff --git a/src/custom_controlnet_aux/dwpose/util.py b/src/custom_controlnet_aux/dwpose/util.py
index cce0dc2..42874b3 100644
--- a/src/custom_controlnet_aux/dwpose/util.py
+++ b/src/custom_controlnet_aux/dwpose/util.py
@@ -79,13 +79,14 @@ def is_normalized(keypoints: List[Optional[Keypoint]]) -> bool:
     return all(point_normalized)
 
     
-def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
+def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint], xinsr_stick_scaling: bool = False) -> np.ndarray:
     """
     Draw keypoints and limbs representing body pose on a given canvas.
 
     Args:
         canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the body pose.
         keypoints (List[Keypoint]): A list of Keypoint objects representing the body keypoints to be drawn.
+        xinsr_stick_scaling (bool): Whether or not scaling stick width for xinsr ControlNet
 
     Returns:
         np.ndarray: A 3D numpy array representing the modified canvas with the drawn body pose.
@@ -98,8 +99,16 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
     else:
         H, W, _ = canvas.shape
 
+    CH, CW, _ = canvas.shape
     stickwidth = 4
 
+    # Ref: https://huggingface.co/xinsir/controlnet-openpose-sdxl-1.0
+    max_side = max(CW, CH)
+    if xinsr_stick_scaling:
+        stick_scale = 1 if max_side < 500 else min(2 + (max_side // 1000), 7)
+    else:
+        stick_scale = 1
+
     limbSeq = [
         [2, 3], [2, 6], [3, 4], [4, 5], 
         [6, 7], [7, 8], [2, 9], [9, 10], 
@@ -125,7 +134,7 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
         mY = np.mean(Y)
         length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
         angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
-        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth*stick_scale), int(angle), 0, 360, 1)
         cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color])
 
     for keypoint, color in zip(keypoints, colors):
diff --git a/src/custom_controlnet_aux/open_pose/__init__.py b/src/custom_controlnet_aux/open_pose/__init__.py
index 6a05efe..a8d0f53 100644
--- a/src/custom_controlnet_aux/open_pose/__init__.py
+++ b/src/custom_controlnet_aux/open_pose/__init__.py
@@ -36,7 +36,7 @@ class PoseResult(NamedTuple):
     right_hand: Union[HandResult, None]
     face: Union[FaceResult, None]
 
-def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True):
+def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True, xinsr_stick_scaling=False):
     """
     Draw the detected poses on an empty canvas.
 
@@ -55,7 +55,7 @@ def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, dr
 
     for pose in poses:
         if draw_body:
-            canvas = util.draw_bodypose(canvas, pose.body.keypoints)
+            canvas = util.draw_bodypose(canvas, pose.body.keypoints, xinsr_stick_scaling)
 
         if draw_hand:
             canvas = util.draw_handpose(canvas, pose.left_hand)
@@ -216,7 +216,7 @@ def detect_poses(self, oriImg, include_hand=False, include_face=False) -> List[P
             
             return results
         
-    def __call__(self, input_image, detect_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", image_and_json=False, upscale_method="INTER_CUBIC", **kwargs):
+    def __call__(self, input_image, detect_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", image_and_json=False, upscale_method="INTER_CUBIC", xinsr_stick_scaling=False, **kwargs):
         if hand_and_face is not None:
             warnings.warn("hand_and_face is deprecated. Use include_hand and include_face instead.", DeprecationWarning)
             include_hand = hand_and_face
@@ -226,7 +226,7 @@ def __call__(self, input_image, detect_resolution=512, include_body=True, includ
         input_image, remove_pad = resize_image_with_pad(input_image, detect_resolution, upscale_method)
         
         poses = self.detect_poses(input_image, include_hand=include_hand, include_face=include_face)
-        canvas = draw_poses(poses, input_image.shape[0], input_image.shape[1], draw_body=include_body, draw_hand=include_hand, draw_face=include_face) 
+        canvas = draw_poses(poses, input_image.shape[0], input_image.shape[1], draw_body=include_body, draw_hand=include_hand, draw_face=include_face, xinsr_stick_scaling=xinsr_stick_scaling) 
         detected_map = HWC3(remove_pad(canvas))
 
         if output_type == "pil":
diff --git a/src/custom_controlnet_aux/open_pose/util.py b/src/custom_controlnet_aux/open_pose/util.py
index a0851ca..21a14c9 100644
--- a/src/custom_controlnet_aux/open_pose/util.py
+++ b/src/custom_controlnet_aux/open_pose/util.py
@@ -67,13 +67,14 @@ def transfer(model, model_weights):
     return transfered_model_weights
 
 
-def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
+def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint], xinsr_stick_scaling: bool = False) -> np.ndarray:
     """
     Draw keypoints and limbs representing body pose on a given canvas.
 
     Args:
         canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the body pose.
         keypoints (List[Keypoint]): A list of Keypoint objects representing the body keypoints to be drawn.
+        xinsr_stick_scaling (bool): Whether or not scaling stick width for xinsr ControlNet
 
     Returns:
         np.ndarray: A 3D numpy array representing the modified canvas with the drawn body pose.
@@ -83,6 +84,12 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
     """
     H, W, C = canvas.shape
     stickwidth = 4
+    # Ref: https://huggingface.co/xinsir/controlnet-openpose-sdxl-1.0
+    max_side = max(H, W)
+    if xinsr_stick_scaling:
+        stick_scale = 1 if max_side < 500 else min(2 + (max_side // 1000), 7)
+    else:
+        stick_scale = 1
 
     limbSeq = [
         [2, 3], [2, 6], [3, 4], [4, 5], 
@@ -109,7 +116,7 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
         mY = np.mean(Y)
         length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
         angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
-        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth*stick_scale), int(angle), 0, 360, 1)
         cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color])
 
     for keypoint, color in zip(keypoints, colors):