From b1094d731b495a5568c9762604785241a879b2fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=2ED=2ET=C3=A0i?= Date: Wed, 4 Sep 2024 10:27:22 +0700 Subject: [PATCH] Add scale_stick_for_xinsr_cn on DWPose and OpenPose. Close https://github.com/Fannovel16/comfyui_controlnet_aux/issues/447 --- node_wrappers/dwpose.py | 8 +++++--- node_wrappers/openpose.py | 8 +++++--- pyproject.toml | 2 +- src/custom_controlnet_aux/dwpose/__init__.py | 8 ++++---- src/custom_controlnet_aux/dwpose/util.py | 13 +++++++++++-- src/custom_controlnet_aux/open_pose/__init__.py | 8 ++++---- src/custom_controlnet_aux/open_pose/util.py | 11 +++++++++-- 7 files changed, 39 insertions(+), 19 deletions(-) diff --git a/node_wrappers/dwpose.py b/node_wrappers/dwpose.py index 75b62bf..904518c 100644 --- a/node_wrappers/dwpose.py +++ b/node_wrappers/dwpose.py @@ -42,7 +42,8 @@ def INPUT_TYPES(s): pose_estimator=INPUT.COMBO( ["dw-ll_ucoco_384_bs5.torchscript.pt", "dw-ll_ucoco_384.onnx", "dw-ll_ucoco.onnx"], default="dw-ll_ucoco_384_bs5.torchscript.pt" - ) + ), + scale_stick_for_xinsr_cn=INPUT.COMBO(["disable", "enable"]) ) RETURN_TYPES = ("IMAGE", "POSE_KEYPOINT") @@ -50,7 +51,7 @@ def INPUT_TYPES(s): CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators" - def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detect_face="enable", resolution=512, bbox_detector="yolox_l.onnx", pose_estimator="dw-ll_ucoco_384.onnx", **kwargs): + def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detect_face="enable", resolution=512, bbox_detector="yolox_l.onnx", pose_estimator="dw-ll_ucoco_384.onnx", scale_stick_for_xinsr_cn="disable", **kwargs): if bbox_detector == "yolox_l.onnx": yolo_repo = DWPOSE_MODEL_NAME elif "yolox" in bbox_detector: @@ -78,13 +79,14 @@ def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detec detect_hand = detect_hand == "enable" detect_body = detect_body == "enable" detect_face = detect_face == "enable" + scale_stick_for_xinsr_cn = scale_stick_for_xinsr_cn == "enable" self.openpose_dicts = [] def func(image, **kwargs): pose_img, openpose_dict = model(image, **kwargs) self.openpose_dicts.append(openpose_dict) return pose_img - out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, resolution=resolution) + out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, resolution=resolution, xinsr_stick_scaling=scale_stick_for_xinsr_cn) del model return { 'ui': { "openpose_json": [json.dumps(self.openpose_dicts, indent=4)] }, diff --git a/node_wrappers/openpose.py b/node_wrappers/openpose.py index ade5b18..c579e6c 100644 --- a/node_wrappers/openpose.py +++ b/node_wrappers/openpose.py @@ -9,7 +9,8 @@ def INPUT_TYPES(s): detect_hand=INPUT.COMBO(["enable", "disable"]), detect_body=INPUT.COMBO(["enable", "disable"]), detect_face=INPUT.COMBO(["enable", "disable"]), - resolution=INPUT.RESOLUTION() + resolution=INPUT.RESOLUTION(), + scale_stick_for_xinsr_cn=INPUT.COMBO(["disable", "enable"]) ) RETURN_TYPES = ("IMAGE", "POSE_KEYPOINT") @@ -17,12 +18,13 @@ def INPUT_TYPES(s): CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators" - def estimate_pose(self, image, detect_hand, detect_body, detect_face, resolution=512, **kwargs): + def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detect_face="enable", scale_stick_for_xinsr_cn="disable", resolution=512, **kwargs): from custom_controlnet_aux.open_pose import OpenposeDetector detect_hand = detect_hand == "enable" detect_body = detect_body == "enable" detect_face = detect_face == "enable" + scale_stick_for_xinsr_cn = scale_stick_for_xinsr_cn == "enable" model = OpenposeDetector.from_pretrained().to(model_management.get_torch_device()) self.openpose_dicts = [] @@ -31,7 +33,7 @@ def func(image, **kwargs): self.openpose_dicts.append(openpose_dict) return pose_img - out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, resolution=resolution) + out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, xinsr_stick_scaling=scale_stick_for_xinsr_cn, resolution=resolution) del model return { 'ui': { "openpose_json": [json.dumps(self.openpose_dicts, indent=4)] }, diff --git a/pyproject.toml b/pyproject.toml index de7db81..b584940 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "comfyui_controlnet_aux" description = "Plug-and-play ComfyUI node sets for making ControlNet hint images" -version = "1.0.4-alpha.8" +version = "1.0.4-alpha.9" dependencies = ["torch", "importlib_metadata", "huggingface_hub", "scipy", "opencv-python>=4.7.0.72", "filelock", "numpy", "Pillow", "einops", "torchvision", "pyyaml", "scikit-image", "python-dateutil", "mediapipe", "svglib", "fvcore", "yapf", "omegaconf", "ftfy", "addict", "yacs", "trimesh[easy]", "albumentations", "scikit-learn", "matplotlib"] [project.urls] diff --git a/src/custom_controlnet_aux/dwpose/__init__.py b/src/custom_controlnet_aux/dwpose/__init__.py index 3730634..f6042f6 100644 --- a/src/custom_controlnet_aux/dwpose/__init__.py +++ b/src/custom_controlnet_aux/dwpose/__init__.py @@ -91,7 +91,7 @@ def draw_animalpose(canvas: np.ndarray, keypoints: list[Keypoint]) -> np.ndarray return canvas -def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True): +def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True, xinsr_stick_scaling=False): """ Draw the detected poses on an empty canvas. @@ -110,7 +110,7 @@ def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, dr for pose in poses: if draw_body: - canvas = util.draw_bodypose(canvas, pose.body.keypoints) + canvas = util.draw_bodypose(canvas, pose.body.keypoints, xinsr_stick_scaling) if draw_hand: canvas = util.draw_handpose(canvas, pose.left_hand) @@ -252,7 +252,7 @@ def detect_poses(self, oriImg) -> List[PoseResult]: keypoints_info = self.dw_pose_estimation(oriImg.copy()) return Wholebody.format_result(keypoints_info) - def __call__(self, input_image, detect_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", image_and_json=False, upscale_method="INTER_CUBIC", **kwargs): + def __call__(self, input_image, detect_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", image_and_json=False, upscale_method="INTER_CUBIC", xinsr_stick_scaling=False, **kwargs): if hand_and_face is not None: warnings.warn("hand_and_face is deprecated. Use include_hand and include_face instead.", DeprecationWarning) include_hand = hand_and_face @@ -262,7 +262,7 @@ def __call__(self, input_image, detect_resolution=512, include_body=True, includ input_image, _ = resize_image_with_pad(input_image, 0, upscale_method) poses = self.detect_poses(input_image) - canvas = draw_poses(poses, input_image.shape[0], input_image.shape[1], draw_body=include_body, draw_hand=include_hand, draw_face=include_face) + canvas = draw_poses(poses, input_image.shape[0], input_image.shape[1], draw_body=include_body, draw_hand=include_hand, draw_face=include_face, xinsr_stick_scaling=xinsr_stick_scaling) canvas, remove_pad = resize_image_with_pad(canvas, detect_resolution, upscale_method) detected_map = HWC3(remove_pad(canvas)) diff --git a/src/custom_controlnet_aux/dwpose/util.py b/src/custom_controlnet_aux/dwpose/util.py index cce0dc2..42874b3 100644 --- a/src/custom_controlnet_aux/dwpose/util.py +++ b/src/custom_controlnet_aux/dwpose/util.py @@ -79,13 +79,14 @@ def is_normalized(keypoints: List[Optional[Keypoint]]) -> bool: return all(point_normalized) -def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: +def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint], xinsr_stick_scaling: bool = False) -> np.ndarray: """ Draw keypoints and limbs representing body pose on a given canvas. Args: canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the body pose. keypoints (List[Keypoint]): A list of Keypoint objects representing the body keypoints to be drawn. + xinsr_stick_scaling (bool): Whether or not scaling stick width for xinsr ControlNet Returns: np.ndarray: A 3D numpy array representing the modified canvas with the drawn body pose. @@ -98,8 +99,16 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: else: H, W, _ = canvas.shape + CH, CW, _ = canvas.shape stickwidth = 4 + # Ref: https://huggingface.co/xinsir/controlnet-openpose-sdxl-1.0 + max_side = max(CW, CH) + if xinsr_stick_scaling: + stick_scale = 1 if max_side < 500 else min(2 + (max_side // 1000), 7) + else: + stick_scale = 1 + limbSeq = [ [2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], @@ -125,7 +134,7 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: mY = np.mean(Y) length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) - polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) + polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth*stick_scale), int(angle), 0, 360, 1) cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color]) for keypoint, color in zip(keypoints, colors): diff --git a/src/custom_controlnet_aux/open_pose/__init__.py b/src/custom_controlnet_aux/open_pose/__init__.py index 6a05efe..a8d0f53 100644 --- a/src/custom_controlnet_aux/open_pose/__init__.py +++ b/src/custom_controlnet_aux/open_pose/__init__.py @@ -36,7 +36,7 @@ class PoseResult(NamedTuple): right_hand: Union[HandResult, None] face: Union[FaceResult, None] -def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True): +def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True, xinsr_stick_scaling=False): """ Draw the detected poses on an empty canvas. @@ -55,7 +55,7 @@ def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, dr for pose in poses: if draw_body: - canvas = util.draw_bodypose(canvas, pose.body.keypoints) + canvas = util.draw_bodypose(canvas, pose.body.keypoints, xinsr_stick_scaling) if draw_hand: canvas = util.draw_handpose(canvas, pose.left_hand) @@ -216,7 +216,7 @@ def detect_poses(self, oriImg, include_hand=False, include_face=False) -> List[P return results - def __call__(self, input_image, detect_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", image_and_json=False, upscale_method="INTER_CUBIC", **kwargs): + def __call__(self, input_image, detect_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", image_and_json=False, upscale_method="INTER_CUBIC", xinsr_stick_scaling=False, **kwargs): if hand_and_face is not None: warnings.warn("hand_and_face is deprecated. Use include_hand and include_face instead.", DeprecationWarning) include_hand = hand_and_face @@ -226,7 +226,7 @@ def __call__(self, input_image, detect_resolution=512, include_body=True, includ input_image, remove_pad = resize_image_with_pad(input_image, detect_resolution, upscale_method) poses = self.detect_poses(input_image, include_hand=include_hand, include_face=include_face) - canvas = draw_poses(poses, input_image.shape[0], input_image.shape[1], draw_body=include_body, draw_hand=include_hand, draw_face=include_face) + canvas = draw_poses(poses, input_image.shape[0], input_image.shape[1], draw_body=include_body, draw_hand=include_hand, draw_face=include_face, xinsr_stick_scaling=xinsr_stick_scaling) detected_map = HWC3(remove_pad(canvas)) if output_type == "pil": diff --git a/src/custom_controlnet_aux/open_pose/util.py b/src/custom_controlnet_aux/open_pose/util.py index a0851ca..21a14c9 100644 --- a/src/custom_controlnet_aux/open_pose/util.py +++ b/src/custom_controlnet_aux/open_pose/util.py @@ -67,13 +67,14 @@ def transfer(model, model_weights): return transfered_model_weights -def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: +def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint], xinsr_stick_scaling: bool = False) -> np.ndarray: """ Draw keypoints and limbs representing body pose on a given canvas. Args: canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the body pose. keypoints (List[Keypoint]): A list of Keypoint objects representing the body keypoints to be drawn. + xinsr_stick_scaling (bool): Whether or not scaling stick width for xinsr ControlNet Returns: np.ndarray: A 3D numpy array representing the modified canvas with the drawn body pose. @@ -83,6 +84,12 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: """ H, W, C = canvas.shape stickwidth = 4 + # Ref: https://huggingface.co/xinsir/controlnet-openpose-sdxl-1.0 + max_side = max(H, W) + if xinsr_stick_scaling: + stick_scale = 1 if max_side < 500 else min(2 + (max_side // 1000), 7) + else: + stick_scale = 1 limbSeq = [ [2, 3], [2, 6], [3, 4], [4, 5], @@ -109,7 +116,7 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: mY = np.mean(Y) length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) - polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) + polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth*stick_scale), int(angle), 0, 360, 1) cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color]) for keypoint, color in zip(keypoints, colors):