thuiar
diff --git a/‎LICENSE
Lines changed: 0 additions & 4 deletions b/‎LICENSE
Lines changed: 0 additions & 4 deletions
diff --git a/‎README.md
Lines changed: 46 additions & 32 deletions b/‎README.md
Lines changed: 46 additions & 32 deletions
diff --git a/‎ResultAnalysis.ipynb
Lines changed: 0 additions & 425 deletions b/‎ResultAnalysis.ipynb
Lines changed: 0 additions & 425 deletions
diff --git a/‎assets/MainModel.pdf
107 KB b/‎assets/MainModel.pdf
107 KB
diff --git a/‎config/config_run.py renamed to ‎config/config_regression.py
Lines changed: 85 additions & 92 deletions b/‎config/config_run.py renamed to ‎config/config_regression.py
Lines changed: 85 additions & 92 deletions
@@ -1,10 +1,6 @@
 MIT License
 
-<<<<<<< HEAD
-Copyright (c) 2020 THUIAR
-=======
 Copyright (c) 2020 iyuge2
->>>>>>> master
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 
@@ -1,52 +1,65 @@
-![Python 3.6](https://img.shields.io/badge/python-3.6-green.svg)
-# SELF-MM
-> Pytorch implementation for codes in [Learning Modality-Specific Representations with Self-Supervised Multi-Task Learning for Multimodal Sentiment Analysis (AAAI2021)]()
+![Python 3.7](https://img.shields.io/badge/python-3.7-green.svg)
 
+## SELF-MM
+> Pytorch implementation for codes in [Learning Modality-Specific Representations with Self-Supervised Multi-Task Learning for Multimodal Sentiment Analysis (AAAI2021)]() 
+
+### Model
+
+![model](assets/MainModel.pdf)
 
 ### Usage
-> This repo is similar to our previous work, [MMSA](https://github.com/thuiar/MMSA).
 
----
+1. Download datasets and preprocessing
+- mosi and MOSEI
+> download from [CMU-MultimodalSDK](http://immortal.multicomp.cs.cmu.edu/raw_datasets/processed_data/)
+
+- sims
+> download from [Baidu Yun Disk](https://pan.baidu.com/s/1CmLdhYSVnNFAyA0DkR6tdA)[code: `ozo2`] or [Google Drive](https://drive.google.com/file/d/1z6snOkOoy100F33lzmHHB_DUGJ47DaQo/view?usp=sharing)
 
-- Clone this repo and install requirements.
+Then, preprocess data and save as a pickle file with the following structure.
+```python
+{
+    "train": {
+        "raw_text": [],
+        "audio": [],
+        "vision": [],
+        "id": [], # [video_id$_$clip_id, ..., ...]
+        "text": [],
+        "text_bert": [],
+        "audio_lengths": [],
+        "vision_lengths": [],
+        "annotations": [],
+        "classification_labels": [], # Negative(< 0), Neutral(0), Positive(> 0)
+        "regression_labels": []
+    },
+    "valid": {***}, # same as the "train" 
+    "test": {***}, # same as the "train"
+}
+```
+
+2. Download [Bert-Base, Chinese](https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip) from [Google-Bert](https://github.com/google-research/bert).  
+Then, convert Tensorflow into pytorch using [transformers-cli](https://huggingface.co/transformers/converting_tensorflow_models.html)  
+
+2. Clone this repo and install requirements.
 ```
 git clone https://github.com/thuiar/Self-MM
 cd Self-MM
+conda create --name self_mm python=3.7
+source activate self_mm
 pip install -r requirements.txt
 ```
 
-- Run codes
+4. Make some changes
+Modify the `config/config_tune.py` and `config/config_regression.py` to update dataset pathes.
+
+3. Run codes
 ```
 python run.py --modelName self_mm --datasetName mosi
 ```
 
 ### Results
-- MOSI
-
-| Model     | MAE   | Corr  | Acc-2 | F1-Score |
-| :---:     | :---: | :---: | :---: | :---:    |
-| BERT-MULT |  |  | | |
-| BERT-MISA |  |  | | |
-| BERT-MAG  |  |  | | |
-| SELF-MM   |  |  | | |
-
-- MOSEI
-
-| Model     | MAE | Corr | Acc-2 | F1-Score |
-| :---:     | :---: | :---: | :---: | :---: |
-| BERT-MULT |  |  | |
-| BERT-MISA |  |  | |
-| BERT-MAG  |  |  | |
-| SELF-MM   |  |  | |
-
-- SIMS
 
-| Model     | MAE | Corr | Acc-2 | F1-Score |
-| :---:     | :---: | :---: | :---: | :---: |
-| BERT-MULT |  |  | |
-| BERT-MISA |  |  | |
-| BERT-MAG  |  |  | |
-| SELF-MM   |  |  | |
+> We will update all results in another repo, [MMSA](https://github.com/thuiar/MMSA), in the next few days. 
 
 
 ### Paper
@@ -56,6 +69,7 @@ Please cite our paper if you find our work useful for your research:
 @inproceedings{yu2021le,
   title={Learning Modality-Specific Representations with Self-Supervised Multi-Task Learning for Multimodal Sentiment Analysis},
   author={Yu, Wenmeng and Xu, Hua and Ziqi, Yuan and Jiele, Wu},
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
   year={2021}
 }
 ```
@@ -1,103 +1,109 @@
 import os
 import argparse
 
-os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+from utils.functions import Storage
 
-__all__ = ['Config']
-
-class Storage(dict):
-    """
-    A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']`
-    ref: https://blog.csdn.net/a200822146085/article/details/88430450
-    """
-    def __getattr__(self, key):
-        try:
-            return self[key]
-        except KeyError as k:
-            raise AttributeError(k)
-
-    def __setattr__(self, key, value):
-        self[key] = value
-
-    def __delattr__(self, key):
-        try:
-            del self[key]
-        except KeyError as k:
-            raise AttributeError(k)
-
-    def __str__(self):
-        return "<" + self.__class__.__name__ + dict.__repr__(self) + ">"
-
-class Config():
+class ConfigRegression():
     def __init__(self, args):
-        # parameters for data
-        # global parameters for running
-        self.globalArgs = args
         # hyper parameters for models
-        self.HYPER_MODEL_MAP = {
+        HYPER_MODEL_MAP = {
             'self_mm': self.__SELF_MM
         }
         # hyper parameters for datasets
-        self.HYPER_DATASET_MAP = self.__datasetCommonParams()
+        HYPER_DATASET_MAP = self.__datasetCommonParams()
+
+        # normalize
+        model_name = str.lower(args.modelName)
+        dataset_name = str.lower(args.datasetName)
+        # load params
+        commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
+        dataArgs = HYPER_DATASET_MAP[dataset_name]
+        dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
+        # integrate all parameters
+        self.args = Storage(dict(vars(args),
+                            **dataArgs,
+                            **commonArgs,
+                            **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
+                            ))
 
     def __datasetCommonParams(self):
-        assert self.globalArgs.datasetName in ['mosi', 'mosei', 'sims']
-
-        tmp = "aligned" if self.globalArgs.aligned else "unaligned"
-        if self.globalArgs.datasetName in ['mosi', 'mosei']:
-            text_len = 50
-        elif self.globalArgs.datasetName in ['sims']:
-            text_len = 39
-            
-        dataPath = os.path.join(self.globalArgs.data_dir, self.globalArgs.datasetName, \
-                                tmp + '_' + str(text_len) + '.pkl')
+        root_dataset_dir = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/StandardDatasets'
         tmp = {
             'mosi':{
-                'dataPath': dataPath,
-                'input_lens': (50, 50, 50) if self.globalArgs.aligned else (50, 500, 375),
-                # (text, audio, video)
-                'feature_dims': (768, 5, 20),
-                'train_samples': 1284,
-                'language': 'en',
-                'KeyEval': 'Loss'
+                'aligned': {
+                    'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/aligned_50.pkl'),
+                    'seq_lens': (50, 50, 50),
+                    # (text, audio, video)
+                    'feature_dims': (768, 5, 20),
+                    'train_samples': 1284,
+                    'num_classes': 3,
+                    'language': 'en',
+                    'KeyEval': 'Loss' 
+                },
+                'unaligned': {
+                    'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'),
+                    'seq_lens': (50, 50, 50),
+                    # (text, audio, video)
+                    'feature_dims': (768, 5, 20),
+                    'train_samples': 1284,
+                    'num_classes': 3,
+                    'language': 'en',
+                    'KeyEval': 'Loss' 
+                }
             },
             'mosei':{
-                'dataPath': dataPath,
-                'input_lens': (50, 50, 50) if self.globalArgs.aligned else (50, 500, 375),
-                # (text, audio, video)
-                'feature_dims': (768, 74, 35),
-                'train_samples': 16326,
-                'language': 'en',
-                'KeyEval': 'Loss'
+                'aligned': {
+                    'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/aligned_50.pkl'),
+                    'seq_lens': (50, 50, 50),
+                    # (text, audio, video)
+                    'feature_dims': (768, 74, 35),
+                    'train_samples': 16326,
+                    'num_classes': 3,
+                    'language': 'en',
+                    'KeyEval': 'Loss'
+                },
+                'unaligned': {
+                    'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'),
+                    'seq_lens': (50, 500, 375),
+                    # (text, audio, video)
+                    'feature_dims': (768, 74, 35),
+                    'train_samples': 16326,
+                    'num_classes': 3,
+                    'language': 'en',
+                    'KeyEval': 'Loss'
+                }
             },
             'sims':{
-                'dataPath': dataPath,
-                # (batch_size, input_lens, feature_dim)
-                'input_lens': (39, 400, 55), # (text, audio, video)
-                'feature_dims': (768, 33, 709), # (text, audio, video)
-                'train_samples': 1368,
-                'language': 'cn',
-                'KeyEval': 'Loss',
-            },
+                'unaligned': {
+                    'dataPath': os.path.join(root_dataset_dir, 'SIMS/Processed/features/unaligned_39.pkl'),
+                    # (batch_size, seq_lens, feature_dim)
+                    'seq_lens': (39, 400, 55), # (text, audio, video)
+                    'feature_dims': (768, 33, 709), # (text, audio, video)
+                    'train_samples': 1368,
+                    'num_classes': 3,
+                    'language': 'cn',
+                    'KeyEval': 'Loss',
+                }
+            }
         }
         return tmp
 
     def __SELF_MM(self):
         tmp = {
             'commonParas':{
-                'need_align': False,
-                'need_normalize': False,
+                'need_data_aligned': False,
+                'need_model_aligned': False,
+                'need_normalized': False,
                 'use_bert': True,
                 'use_finetune': True,
-                'early_stop': 12,
-                'task_type': 'regression', # regression / classification
-                'tasks': 'M'
+                'save_labels': True,
+                'early_stop': 8,
+                'update_epochs': 4
             },
             # dataset
             'datasetParas':{
                 'mosi':{
                     # the batch_size of each epoch is update_epochs * batch_size
-                    'update_epochs': 4,
                     'batch_size': 32,
                     'learning_rate_bert': 5e-5,
                     'learning_rate_audio': 1e-3,
@@ -107,7 +113,6 @@ def __SELF_MM(self):
                     'weight_decay_audio': 0.01,
                     'weight_decay_video': 0.001,
                     'weight_decay_other': 0.001,
-                    'num_classes': 1,
                     # feature subNets
                     'a_lstm_hidden_size': 32,
                     'v_lstm_hidden_size': 64,
@@ -133,7 +138,6 @@ def __SELF_MM(self):
                 },
                 'mosei':{
                     # the batch_size of each epoch is update_epochs * batch_size
-                    'update_epochs': 4,
                     'batch_size': 32,
                     'learning_rate_bert': 5e-5,
                     'learning_rate_audio': 0.005,
@@ -143,7 +147,6 @@ def __SELF_MM(self):
                     'weight_decay_audio': 0.0,
                     'weight_decay_video': 0.0,
                     'weight_decay_other': 0.01,
-                    'num_classes': 1,
                     # feature subNets
                     'a_lstm_hidden_size': 32,
                     'v_lstm_hidden_size': 32,
@@ -169,19 +172,17 @@ def __SELF_MM(self):
                 },
                 'sims':{
                     # the batch_size of each epoch is update_epochs * batch_size
-                    'update_epochs': 4,
-                    'batch_size': 16,
+                    'batch_size': 32,
                     'learning_rate_bert': 5e-5,
                     'learning_rate_audio': 0.005,
-                    'learning_rate_video': 0.005,
+                    'learning_rate_video': 0.001,
                     'learning_rate_other': 0.001,
-                    'weight_decay_bert': 0.001,
-                    'weight_decay_audio': 0.01,
+                    'weight_decay_bert': 0.01,
+                    'weight_decay_audio': 0.0,
                     'weight_decay_video': 0.0,
                     'weight_decay_other': 0.001,
-                    'num_classes': 1,
                     # feature subNets
-                    'a_lstm_hidden_size': 32,
+                    'a_lstm_hidden_size': 16,
                     'v_lstm_hidden_size': 64,
                     'a_lstm_layers': 1,
                     'v_lstm_layers': 1,
@@ -192,11 +193,11 @@ def __SELF_MM(self):
                     'v_lstm_dropout': 0.0,
                     't_bert_dropout':0.1,
                     # post feature
-                    'post_fusion_dim': 64,
+                    'post_fusion_dim': 128,
                     'post_text_dim':64,
-                    'post_audio_dim': 16,
+                    'post_audio_dim': 32,
                     'post_video_dim': 16,
-                    'post_fusion_dropout': 0.0,
+                    'post_fusion_dropout': 0.1,
                     'post_text_dropout': 0.1,
                     'post_audio_dropout': 0.0,
                     'post_video_dropout': 0.1,
@@ -208,12 +209,4 @@ def __SELF_MM(self):
         return tmp
 
     def get_config(self):
-        # normalize
-        model_name = str.lower(self.globalArgs.modelName)
-        dataset_name = str.lower(self.globalArgs.datasetName)
-        # integrate all parameters
-        res =  Storage(dict(vars(self.globalArgs),
-                            **self.HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
-                            **self.HYPER_MODEL_MAP[model_name]()['commonParas'],
-                            **self.HYPER_DATASET_MAP[dataset_name]))
-        return res
+        return self.args