Initial commit

bytedance · Feb 14, 2025 · 603cc12 · 603cc12
1 parent b1d1f19
commit 603cc12
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -79,15 +79,19 @@ bash ./tools/battle_vs_human.sh
 
 
 ### Robotics
-Download CALVIN dataset follow the official instructions and organize it as follows:
+Download CALVIN dataset follow the [official instructions](https://github.com/mees/calvin) and organize it as follows:
 ```
 ├── VideoWorld
 │   ├── VideoWorld
-│   │   └── data
-│   └──       └── calvin
+│   │   │── data
+│   │   │   └── calvin
+│   │   └── work_dirs
+│   │       │── Llama_tokenizer
+│   │       │── calvin_model.pth 
+│   └──     └── Intern_300m
 ```
 
-Testing requires the CALVIN environment configuration. We have automated the installation of CALVIN in the install.sh script. If any issues arise, please refer to the official installation instructions: https://github.com/mees/calvin
+Testing requires the CALVIN environment configuration. We have automated the installation of CALVIN in the install.sh script. If any issues arise, please refer to the official installation instructions
 ```
 cd VideoWorld # This VideoWorld is located in a subdirectory.
 # Since we only tested the tasks of opening drawers, pushing 

diff --git a/VideoWorld/falcon/datasets/pipelines/tokenizer.py b/VideoWorld/falcon/datasets/pipelines/tokenizer.py
@@ -550,7 +550,7 @@ def __init__(self,
         assert isinstance(
             pretrained, str
         ), f'Autokenizer must has pretrained models, but get {pretrained}'
-        self.tokenizer = AutoTokenizer.from_pretrained("./work_dirs/init/Llama-300m", model_max_length=max_length, padding_side=padding_side, use_fast=False, **kwargs)
+        self.tokenizer = AutoTokenizer.from_pretrained("./work_dirs/Llama_tokenizer", model_max_length=max_length, padding_side=padding_side, use_fast=False, **kwargs)
 
         self.tokenizer.model_max_length = max_length
         self.tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, DEFAULT_IMAGE_TOKEN], special_tokens=True)

diff --git a/VideoWorld/falcon/models/algorithms/calvin_GR1_wostate_vq_idm.py b/VideoWorld/falcon/models/algorithms/calvin_GR1_wostate_vq_idm.py
@@ -266,7 +266,8 @@ def __init__(self, vbackbone, neck, head, quantizer, init_cfg=None, pred_image=T
         task_cfg = OmegaConf.load(conf_dir / "callbacks/rollout/tasks/new_playtable_tasks.yaml")
         self.task_oracle = hydra.utils.instantiate(task_cfg)
         self.val_annotations = OmegaConf.load(conf_dir / "annotations/new_playtable_validation.yaml")
-        os.system('mkdir -p /opt/tiger/rollout/')
+        os.system('mkdir -p ./visualize_calvin')
+
     def check_rec(self, visual_ids, visual_hand_ids, img, img_hand, is_rollout=False, ep_idx=0):
 
         b, t = visual_hand_ids.shape[:2]
@@ -472,7 +473,7 @@ def forward_train(self, img, input_ids, pred_label=None, attention_mask=None, **
 
 
     def rollout_pred_rgb(self, img, seq_input_ids, pred_label=None, seq_attention_mask=None, index=None, **kwargs):
-
+        # import pdb;pdb.set_trace()
         import cv2
         scene = kwargs.pop('scene')
         robot_obs = kwargs.pop('state')
@@ -485,7 +486,7 @@ def rollout_pred_rgb(self, img, seq_input_ids, pred_label=None, seq_attention_ma
         self.eval_env.reset(robot_obs=robot_obs.cpu().numpy()[0], scene_obs=scene.cpu().numpy()[0])
         episode_infos = kwargs.get('episode_infos', None)
         success_counter = 0
-        vis_root = "/opt/tiger/rollout"
+        vis_root = "./visualize_calvin"
         sequence_idx = len(os.listdir(vis_root))
         vis_path = f"{vis_root}/{sequence_idx}"
         os.system(f'mkdir -p {vis_path}')
@@ -665,6 +666,7 @@ def rollout_pred_rgb(self, img, seq_input_ids, pred_label=None, seq_attention_ma
                 break
         print(success_counter)
         record = [{'success_counter': success_counter}]
+
         return record
 
     def encode_image(self, img, dtype=None, device=None):

diff --git a/VideoWorld/tools/calvin_test.sh b/VideoWorld/tools/calvin_test.sh
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #!/usr/bin/env bash
+export KIVY_NO_ARGS=1
 CONFIG="./configs/calvin_test.py"
 GPUS=1
 NNODES=1