diff --git a/README.md b/README.md index 93c4b3a..da94ec9 100644 --- a/README.md +++ b/README.md @@ -79,15 +79,19 @@ bash ./tools/battle_vs_human.sh ### Robotics -Download CALVIN dataset follow the official instructions and organize it as follows: +Download CALVIN dataset follow the [official instructions](https://github.com/mees/calvin) and organize it as follows: ``` ├── VideoWorld │ ├── VideoWorld -│ │ └── data -│ └── └── calvin +│ │ │── data +│ │ │ └── calvin +│ │ └── work_dirs +│ │ │── Llama_tokenizer +│ │ │── calvin_model.pth +│ └── └── Intern_300m ``` -Testing requires the CALVIN environment configuration. We have automated the installation of CALVIN in the install.sh script. If any issues arise, please refer to the official installation instructions: https://github.com/mees/calvin +Testing requires the CALVIN environment configuration. We have automated the installation of CALVIN in the install.sh script. If any issues arise, please refer to the official installation instructions ``` cd VideoWorld # This VideoWorld is located in a subdirectory. # Since we only tested the tasks of opening drawers, pushing diff --git a/VideoWorld/falcon/datasets/pipelines/tokenizer.py b/VideoWorld/falcon/datasets/pipelines/tokenizer.py index 3af86c5..0b20cd0 100644 --- a/VideoWorld/falcon/datasets/pipelines/tokenizer.py +++ b/VideoWorld/falcon/datasets/pipelines/tokenizer.py @@ -550,7 +550,7 @@ def __init__(self, assert isinstance( pretrained, str ), f'Autokenizer must has pretrained models, but get {pretrained}' - self.tokenizer = AutoTokenizer.from_pretrained("./work_dirs/init/Llama-300m", model_max_length=max_length, padding_side=padding_side, use_fast=False, **kwargs) + self.tokenizer = AutoTokenizer.from_pretrained("./work_dirs/Llama_tokenizer", model_max_length=max_length, padding_side=padding_side, use_fast=False, **kwargs) self.tokenizer.model_max_length = max_length self.tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, DEFAULT_IMAGE_TOKEN], special_tokens=True) diff --git a/VideoWorld/falcon/models/algorithms/calvin_GR1_wostate_vq_idm.py b/VideoWorld/falcon/models/algorithms/calvin_GR1_wostate_vq_idm.py index 732f4f5..988971f 100644 --- a/VideoWorld/falcon/models/algorithms/calvin_GR1_wostate_vq_idm.py +++ b/VideoWorld/falcon/models/algorithms/calvin_GR1_wostate_vq_idm.py @@ -266,7 +266,8 @@ def __init__(self, vbackbone, neck, head, quantizer, init_cfg=None, pred_image=T task_cfg = OmegaConf.load(conf_dir / "callbacks/rollout/tasks/new_playtable_tasks.yaml") self.task_oracle = hydra.utils.instantiate(task_cfg) self.val_annotations = OmegaConf.load(conf_dir / "annotations/new_playtable_validation.yaml") - os.system('mkdir -p /opt/tiger/rollout/') + os.system('mkdir -p ./visualize_calvin') + def check_rec(self, visual_ids, visual_hand_ids, img, img_hand, is_rollout=False, ep_idx=0): b, t = visual_hand_ids.shape[:2] @@ -472,7 +473,7 @@ def forward_train(self, img, input_ids, pred_label=None, attention_mask=None, ** def rollout_pred_rgb(self, img, seq_input_ids, pred_label=None, seq_attention_mask=None, index=None, **kwargs): - + # import pdb;pdb.set_trace() import cv2 scene = kwargs.pop('scene') robot_obs = kwargs.pop('state') @@ -485,7 +486,7 @@ def rollout_pred_rgb(self, img, seq_input_ids, pred_label=None, seq_attention_ma self.eval_env.reset(robot_obs=robot_obs.cpu().numpy()[0], scene_obs=scene.cpu().numpy()[0]) episode_infos = kwargs.get('episode_infos', None) success_counter = 0 - vis_root = "/opt/tiger/rollout" + vis_root = "./visualize_calvin" sequence_idx = len(os.listdir(vis_root)) vis_path = f"{vis_root}/{sequence_idx}" os.system(f'mkdir -p {vis_path}') @@ -665,6 +666,7 @@ def rollout_pred_rgb(self, img, seq_input_ids, pred_label=None, seq_attention_ma break print(success_counter) record = [{'success_counter': success_counter}] + return record def encode_image(self, img, dtype=None, device=None): diff --git a/VideoWorld/tools/calvin_test.sh b/VideoWorld/tools/calvin_test.sh index 8626fb0..606df20 100644 --- a/VideoWorld/tools/calvin_test.sh +++ b/VideoWorld/tools/calvin_test.sh @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. #!/usr/bin/env bash +export KIVY_NO_ARGS=1 CONFIG="./configs/calvin_test.py" GPUS=1 NNODES=1