settings file

neka-nat · Dec 9, 2023 · 61b538c · 61b538c
1 parent 6c7ef08
commit 61b538c
Show file tree

Hide file tree

Showing 5 changed files with 83 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -36,6 +36,35 @@ cd scripts
 python demo.py
 ```
 
+## Settings
+
+You can configure the robot using the `configs/settings.yml` file.
+Please set the connection port with the robot, the camera ID, the suction pin number, and other hardware-related settings according to your own environment.
+
+```yaml
+pixel_size_on_capture_position: 0.44 * 1.0e-3  # [m/pixel]
+interface_type: "AUDIO"
+camera_id: 4
+language: "Japanese"
+mycobot_settings:
+  urdf_path: "../data/mycobot/mycobot.urdf"
+  end_effector_name: "camera_flange"
+  port: "/dev/ttyACM0"
+  baud: 115200
+  default_speed: 40
+  default_z_speed: 20
+  suction_pin: 5
+  command_timeout: 5
+  use_gravity_compensation: false
+  end_effector_height: 0.065  # pump head offset
+  object_height: 0.01
+  release_height: 0.05
+  positions:
+    home: [0, 20, -130, 20, 0, 0]
+    capture: [0, 0, -30, -60, 0, -45]
+    drop: [-45, 20, -130, 20, 0, 0]
+```
+
 ## Related links
 
 * [Set-of-Mark-Visual-Prompting-for-GPT-4V](https://github.com/microsoft/SoM)

diff --git a/configs/settings.yml b/configs/settings.yml
@@ -0,0 +1,21 @@
+pixel_size_on_capture_position: 0.00043  # [m/pixel]
+interface_type: "AUDIO"
+camera_id: 0
+language: "Japanese"
+mycobot_settings:
+  urdf_path: "../data/mycobot/mycobot.urdf"
+  end_effector_name: "camera_flange"
+  port: "/dev/ttyACM0"
+  baud: 115200
+  default_speed: 40
+  default_z_speed: 20
+  suction_pin: 5
+  command_timeout: 5
+  use_gravity_compensation: false
+  end_effector_height: 0.065  # pump head offset
+  object_height: 0.01
+  release_height: 0.05
+  positions:
+    home: [0, 20, -130, 20, 0, 0]
+    capture: [0, 0, -30, -60, 0, -45]
+    drop: [-45, 20, -130, 20, 0, 0]
diff --git a/mylangrobot/operator.py b/mylangrobot/operator.py
@@ -17,7 +17,7 @@
 class SOMOperator:
     def __init__(
         self,
-        pixel_size_on_capture_position: float = 0.44 * 1.0e-3,  # [m/pixel]
+        pixel_size_on_capture_position: float = 0.43 * 1.0e-3,  # [m/pixel]
         interface_type: InterfaceType = InterfaceType.AUDIO,
         camera_id: int = 0,
         language: str = "English",
@@ -37,6 +37,7 @@ def __init__(
         self._language = language
         self._pixel_size_on_capture_position = pixel_size_on_capture_position
         self._current_frame = None
+        self._cam_center = None
         self.capture_image_callback = capture_image_callback or (lambda _: self.save_current_image("capture.png"))
         self.annotate_image_callback = annotate_image_callback or (lambda x: cv2.imwrite("annotated.png", x))
 
@@ -97,9 +98,11 @@ def update_current_frame(self):
             raise RuntimeError("Failed to read frame")
         self._current_frame = frame
         self._current_frame = cv2.rotate(self._current_frame, cv2.ROTATE_180)
-        # crop the right side of the image, because the end effector is on the right side
-        _, width, _ = self._current_frame.shape
-        self._current_frame = self._current_frame[:, :-(width // 8), :]
+        # Since the robot body is on the bottom of the image and the end effector is on the right,
+        # crop the bottom and right sides of the image.
+        height, width, _ = self._current_frame.shape
+        self._cam_center = np.array([height / 2, width / 2])
+        self._current_frame = self._current_frame[: -(height // 4), : -(width // 8), :]
 
     def run(self):
         chat_history = []
@@ -122,7 +125,7 @@ def execute_command(self, input_text: str) -> Optional[tuple[str, str]]:
         time.sleep(1)
         self.update_current_frame()
         self.capture_image_callback(self._current_frame)
-        res, response_type, obj_centers = self.process_image(self._current_frame, input_text)
+        res, response_type, obj_centers = self.process_image(self._current_frame, self._cam_center, input_text)
         if response_type == ResponseType.QUESTION:
             self._interface.output(res)
             return (input_text, res)
@@ -134,7 +137,7 @@ def execute_command(self, input_text: str) -> Optional[tuple[str, str]]:
             return (input_text, "<Execute code>")
         return None
 
-    def process_image(self, image: np.ndarray, text: str) -> tuple[str, ResponseType, list]:
+    def process_image(self, image: np.ndarray, cam_center: np.ndarray, text: str) -> tuple[str, ResponseType, list]:
         """Process image and return response text and response type.
 
         Note:
@@ -148,7 +151,6 @@ def process_image(self, image: np.ndarray, text: str) -> tuple[str, ResponseType
         print("[SOMOperator]", response_type, res)
         # calculate center of masks
         centers = []
-        cam_center = np.array([image.shape[0] / 2, image.shape[1] / 2])
         if response_type == ResponseType.CODE:
             for mask in detections.mask:
                 center = np.mean(np.where(mask == True), axis=1)

diff --git a/mylangrobot/robot_controller.py b/mylangrobot/robot_controller.py
@@ -9,7 +9,7 @@
 
 
 class MyCobotSettings(BaseModel):
-    urdf_path: str = os.path.join(os.path.dirname(__file__), "../data/mycobot/mycobot.urdf")
+    urdf_path: str = "../data/mycobot/mycobot.urdf"
     end_effector_name: str = "camera_flange"
     port: str = "/dev/ttyACM0"
     baud: int = 115200
@@ -27,6 +27,10 @@ class MyCobotSettings(BaseModel):
         "drop": [-45, 20, -130, 20, 0, 0],
     }
 
+    @property
+    def full_urdf_path(self) -> str:
+        return os.path.join(os.path.dirname(__file__), self.urdf_path)
+
 
 class MyCobotController:
     def __init__(self, **kwargs):
@@ -37,10 +41,10 @@ def __init__(self, **kwargs):
         self._default_z_speed = settings.default_z_speed
         self._command_timeout = settings.command_timeout
         self._use_gravity_compensation = settings.use_gravity_compensation
-        self._sim = kp.build_serial_chain_from_urdf(open(settings.urdf_path).read(), settings.end_effector_name)
+        self._sim = kp.build_serial_chain_from_urdf(open(settings.full_urdf_path).read(), settings.end_effector_name)
         self._current_position = self._mycobot.get_angles()
         self.positions = settings.positions
-        self.capture_coord = self._calc_camera_lens_coords_on_capture_position(settings.urdf_path)
+        self.capture_coord = self._calc_camera_lens_coords_on_capture_position(settings.full_urdf_path)
         self.end_effector_height = settings.end_effector_height  # pump head offset
         self.object_height = settings.object_height
         self.release_height = settings.release_height

diff --git a/scripts/demo.py b/scripts/demo.py
@@ -1,13 +1,27 @@
 import argparse
 
+import yaml
+
 from mylangrobot.operator import SOMOperator
+from mylangrobot.interface import InterfaceType
+from mylangrobot.robot_controller import MyCobotSettings
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--camera-id", type=int, default=0)
-    parser.add_argument("--language", type=str, default="English")
+    parser.add_argument("--config", type=str, default="../configs/settings.yml")
     args = parser.parse_args()
 
-    som = SOMOperator(camera_id=args.camera_id, language=args.language)
+    with open(args.config, "r") as f:
+        config = yaml.safe_load(f)
+
+    mycobot_settings = MyCobotSettings(**config["mycobot_settings"])
+
+    som = SOMOperator(
+        pixel_size_on_capture_position=config["pixel_size_on_capture_position"],
+        interface_type=InterfaceType[config["interface_type"]],
+        camera_id=config["camera_id"],
+        language=config["language"],
+        mycobot_settings=mycobot_settings,
+    )
     som.run()