Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
MaverickRen committed Feb 10, 2025
1 parent 673b71d commit 0ae333b
Show file tree
Hide file tree
Showing 12 changed files with 418 additions and 47 deletions.
18 changes: 9 additions & 9 deletions LDM/configs/calvin_ldm.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
]
data_root = "./data/calvin/task_ABCD_D/training"
train_dataloader = dict(
batch_size=1,
batch_size=4,
num_workers=4,
drop_last=True,
dataset=dict(
Expand Down Expand Up @@ -163,13 +163,13 @@


metrics = [
dict(
type='FVD',
prefix='FVD',
fake_nums=19772,
inception_path='./work_dirs/init/fvd/i3d_torchscript.pt',
inception_style='StyleGAN',
sample_model='ema'),
# dict(
# type='FVD',
# prefix='FVD',
# fake_nums=19772,
# inception_path='./work_dirs/init/fvd/i3d_torchscript.pt',
# inception_style='StyleGAN',
# sample_model='ema'),
]
# config for val
val_cfg = dict(type='MultiValLoop')
Expand All @@ -180,7 +180,7 @@
test_evaluator = dict(type='LAFeatMFMetric',collect_device='cpu', la_num = 729, gt_act_num = 81)

# load from which checkpoint
load_from = './work_dirs/init/magvit/iter_332800_new.pth' # load_from=None
load_from = './work_dirs/magvit_init.pth' # load_from=None
# load_from = None
# whether to resume training from the loaded checkpoint
resume = False
Expand Down
4 changes: 2 additions & 2 deletions LDM/configs/go_ldm.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@
test_evaluator = dict(type='LAGoFeatMetric',collect_device='cpu', la_num = 729, gt_act_num = 81, gt_select_frame=[1])

# load from which checkpoint
load_from = '/opt/tiger/mmagicinit/ldm/work_dirs/go_2frame_sepqformer_likebefore_interval5/iter_64000_new.pth' # load_from=None
# load_from = None
# load_from = '/opt/tiger/mmagicinit/ldm/work_dirs/go_2frame_sepqformer_likebefore_interval5/iter_64000_new.pth' # load_from=None
load_from = './work_dirs/magvit_init.pth'
# whether to resume training from the loaded checkpoint
resume = False

Expand Down
4 changes: 2 additions & 2 deletions LDM/ldm/datasets/calvin_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ def __init__(
lang_info_path = osp.join(data_root, 'lang_annotations/auto_lang_ann.npy')
annotations = np.load(lang_info_path, allow_pickle=True).item()
annotations = list(zip(annotations["info"]["indx"], annotations["language"]["ann"])) #((np.int64(1401659), np.int64(1401723)), 'move the door all the way to the right')
length = len(annotations) // 3
annotations = annotations[(length*2):]
# length = len(annotations) // 3
annotations = annotations[:2]
data_paths = []
langs = []
clip_start_end_ids = []
Expand Down
23 changes: 2 additions & 21 deletions LDM/ldm/evaluation/metrics/latent_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
max_pos = 0.02
max_orn = 0.05
for i, data_sample in enumerate(data_samples):
indice = data_sample['indice'].squeeze().item()
indice = data_sample['indice'].squeeze()
encode_feat = data_sample['encode_feat'].squeeze(2, 3) #C, T
state = data_batch['data_samples'][i].states[0]
deltas = torch.stack([s1 - state[0] for s1 in state[1:]])
Expand Down Expand Up @@ -370,10 +370,7 @@ def compute_metrics(self, results: list):
from matplotlib.colors import LinearSegmentedColormap
metrics = {}
bin_length = 0.1
# rel_x_dict = defaultdict(list)
# rel_y_dict = defaultdict(list)
# rel_z_dict = defaultdict(list)


torch.save(results, './work_dirs/calvin_ldm_results.pth')

rel_x_list = []
Expand Down Expand Up @@ -404,16 +401,6 @@ def compute_metrics(self, results: list):
cmap = LinearSegmentedColormap.from_list("gradient", [start_color, middle_color, end_color], N=20)
colors = cmap(np.linspace(0, 1, 20))

# act_colors = generate_distinct_colors(len(action_types)+1)
# fig, ax = plt.subplots(figsize=(6, 2))
# for i, color in enumerate(act_colors):
# rect = patches.Rectangle((i, 0), 1, 1, linewidth=1, edgecolor='none', facecolor=color)
# ax.add_patch(rect)
# ax.set_xlim(0, len(colors))
# ax.set_ylim(0, 1)
# ax.axis('off')
# plt.savefig('./work_dirscolor_blocks.png', dpi=300)

action_ids = self.gen_action_id(action_types, results)
for i, item in enumerate(results):

Expand Down Expand Up @@ -469,12 +456,6 @@ def compute_metrics(self, results: list):
self.draw_tsne(t_sne_features, gripper_color[fi], dir='./work_dirs', tag=f'gripper_f{fi+1}')
self.draw_tsne(t_sne_features, act_label_colors, dir='./work_dirs', tag=f'act_f{fi+1}')


# for ai, A2L_dict in enumerate(A2L_dict_list):
# for la in A2L_dict:
# A2L_dict[la] = A2L_dict[la] / A2LNums[ai]
# with open('./work_dirsla_test.json', 'w') as f:
# json.dump(A2L_dict_list, f)
return metrics


Expand Down
2 changes: 1 addition & 1 deletion LDM/ldm/models/algorithms/magvit_vq_gan.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def forward(self,
outputs = outputs.squeeze(dim=2)
# outputs is BGR
outputs = self.data_preprocessor.destruct(outputs, data_samples)
import pdb;pdb.set_trace()

gen_sample = DataSample()
gen_sample.indice = indice
gen_sample.fake_img = outputs
Expand Down
16 changes: 11 additions & 5 deletions LDM/ldm/models/losses/vqperceptual.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@



CKPT_MAP = {
"vgg_lpips": "vgg.pth"
}


def download(url, local_path, chunk_size=1024):
Expand Down Expand Up @@ -74,22 +77,25 @@ def __init__(self, use_dropout=True):
self.lins = nn.ModuleList(lins)
root = '../work_dirs/init/lpips'
ckpt = os.path.join(root, 'vgg.pth')

if not os.path.exists(ckpt):
download("https://heibox.uni-heidelberg.de/f/607503859c864bc1b30b/?dl=1", ckpt)
self.load_state_dict(torch.load(ckpt, map_location=torch.device("cpu")), strict=False)
for param in self.parameters():
param.requires_grad = False

def load_from_pretrained(self, name="vgg_lpips"):
ckpt = os.path.join(name, CKPT_MAP[name])
ckpt = os.path.join('../work_dirs/init/lpips', CKPT_MAP[name])
if not os.path.exists(ckpt):
download("https://heibox.uni-heidelberg.de/f/607503859c864bc1b30b/?dl=1", ckpt)
self.load_state_dict(torch.load(ckpt, map_location=torch.device("cpu")), strict=False)


@classmethod
def from_pretrained(cls, name="vgg_lpips"):
model = cls()
ckpt = os.path.join(name, CKPT_MAP[name])
ckpt = os.path.join(name, 'vgg.pth')
ckpt = os.path.join('work_dirs/init/lpips', CKPT_MAP[name])
# ckpt = os.path.join(name, 'vgg.pth')
model.load_state_dict(torch.load(ckpt, map_location=torch.device("cpu")), strict=False)
return model

Expand All @@ -100,14 +106,14 @@ def forward(self, input, target):
feats0, feats1, diffs = {}, {}, {}
lins = [self.cos0, self.cos1, self.cos2, self.cos3, self.cos4]
for kk in range(len(self.channels)):
lins = [self.lins[0], self.lins[1], self.lins[2], self.lins[3], self.lins[4]]
lins = [self.lins[0], self.lins[1], self.lins[2], self.lins[3], self.lins[4]]
for kk in range(len(self.channels)):
feats0[kk], feats1[kk] = normalize_tensor(outs0[kk]), normalize_tensor(outs1[kk])
diffs[kk] = (feats0[kk] - feats1[kk]) ** 2
res = [spatial_average(lins[kk].model(diffs[kk]), keepdim=True) for kk in range(len(self.channels))]
res = [spatial_average(lins[kk].model(diffs[kk]), keepdim=True) for kk in range(len(self.channels))]
val = res[0]
for l in range(1, len(self.channels)):
# for l in range(1, len(self.channels)):
for l in range(1, len(self.channels)):
val += res[l]
return val
Expand Down
2 changes: 1 addition & 1 deletion LDM/tools/calvin_ldm_train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/env bash
CONFIG="./configs/calvin_ldm.py"
CONFIG="./configs/calvin_ldm_debug.py"
GPUS=1
NNODES=1
NODE_RANK=0
Expand Down
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,13 @@ Download CALVIN dataset follow the official instructions and organize it as foll
```
├── VideoWorld
│ ├── LDM
│ │ └── data
│ └── └── calvin
│ │ │── data
│ │ │ └── calvin
│ │ └── work_dirs
│ │ └── magvit_init.pth
│ └──
```
Use the script ./LDM/tools/calvin_ldm_train.sh to initiate LDM training. Upon completion, the latent codes on the training set will be automatically saved to ./LDM/work_dirs/calvin_ldm_results.pth, and the UMAP visualization of the latent codes will also be generated.
Use the script ./LDM/tools/calvin_ldm_train.sh to initiate LDM training. Training requires loading the [Magvit weights](https://huggingface.co/maverickrzw/VideoWorld_CALVIN/tree/main) we pre-trained on natural image reconstruction as initialization. Upon completion, the latent codes on the training set will be automatically saved to ./LDM/work_dirs/calvin_ldm_results.pth, and the UMAP visualization of the latent codes will also be generated.
```
cd LDM
bash ./tools/calvin_ldm_train.sh
Expand Down
5 changes: 2 additions & 3 deletions VideoWorld/configs/calvin_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,11 @@
dict(type='ToTensor', keys=['img', *test_to_tensor]),
]

data_root = "/mnt/bn/panxuran/calvin/task_ABCD_D/training"
la_data_path = "/mnt/bn/zhongwei-lf-dev/work_dirs/latent_action_frame2/la_test_calvin_results_interval2_dict_v2.pth"
data_root = "./data/calvin/task_ABCD_D/training"
la_data_path = "./work_dirs/calvin_ldm_results.pth"
train_dataloader = dict(
batch_size=4,
num_workers=4,
# use_web=False,
pin_memory=False,
persistent_workers=False,
sampler=dict(type='InfiniteSampler', shuffle=True),
Expand Down
Loading

0 comments on commit 0ae333b

Please sign in to comment.