Skip to content

Commit 7cb19fc

Browse files
committed
init
1 parent 8b3ef63 commit 7cb19fc

21 files changed

+586
-891
lines changed

LICENSE

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
MIT License
22

3-
<<<<<<< HEAD
4-
Copyright (c) 2020 THUIAR
5-
=======
63
Copyright (c) 2020 iyuge2
7-
>>>>>>> master
84

95
Permission is hereby granted, free of charge, to any person obtaining a copy
106
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,65 @@
1-
![Python 3.6](https://img.shields.io/badge/python-3.6-green.svg)
2-
# SELF-MM
3-
> Pytorch implementation for codes in [Learning Modality-Specific Representations with Self-Supervised Multi-Task Learning for Multimodal Sentiment Analysis (AAAI2021)]()
1+
![Python 3.7](https://img.shields.io/badge/python-3.7-green.svg)
42

3+
## SELF-MM
4+
> Pytorch implementation for codes in [Learning Modality-Specific Representations with Self-Supervised Multi-Task Learning for Multimodal Sentiment Analysis (AAAI2021)]()
5+
6+
### Model
7+
8+
![model](assets/MainModel.pdf)
59

610
### Usage
7-
> This repo is similar to our previous work, [MMSA](https://github.com/thuiar/MMSA).
811

9-
---
12+
1. Download datasets and preprocessing
13+
- mosi and MOSEI
14+
> download from [CMU-MultimodalSDK](http://immortal.multicomp.cs.cmu.edu/raw_datasets/processed_data/)
15+
16+
- sims
17+
> download from [Baidu Yun Disk](https://pan.baidu.com/s/1CmLdhYSVnNFAyA0DkR6tdA)[code: `ozo2`] or [Google Drive](https://drive.google.com/file/d/1z6snOkOoy100F33lzmHHB_DUGJ47DaQo/view?usp=sharing)
1018
11-
- Clone this repo and install requirements.
19+
Then, preprocess data and save as a pickle file with the following structure.
20+
```python
21+
{
22+
"train": {
23+
"raw_text": [],
24+
"audio": [],
25+
"vision": [],
26+
"id": [], # [video_id$_$clip_id, ..., ...]
27+
"text": [],
28+
"text_bert": [],
29+
"audio_lengths": [],
30+
"vision_lengths": [],
31+
"annotations": [],
32+
"classification_labels": [], # Negative(< 0), Neutral(0), Positive(> 0)
33+
"regression_labels": []
34+
},
35+
"valid": {***}, # same as the "train"
36+
"test": {***}, # same as the "train"
37+
}
38+
```
39+
40+
2. Download [Bert-Base, Chinese](https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip) from [Google-Bert](https://github.com/google-research/bert).
41+
Then, convert Tensorflow into pytorch using [transformers-cli](https://huggingface.co/transformers/converting_tensorflow_models.html)
42+
43+
2. Clone this repo and install requirements.
1244
```
1345
git clone https://github.com/thuiar/Self-MM
1446
cd Self-MM
47+
conda create --name self_mm python=3.7
48+
source activate self_mm
1549
pip install -r requirements.txt
1650
```
1751

18-
- Run codes
52+
4. Make some changes
53+
Modify the `config/config_tune.py` and `config/config_regression.py` to update dataset pathes.
54+
55+
3. Run codes
1956
```
2057
python run.py --modelName self_mm --datasetName mosi
2158
```
2259

2360
### Results
24-
- MOSI
25-
26-
| Model | MAE | Corr | Acc-2 | F1-Score |
27-
| :---: | :---: | :---: | :---: | :---: |
28-
| BERT-MULT | | | | |
29-
| BERT-MISA | | | | |
30-
| BERT-MAG | | | | |
31-
| SELF-MM | | | | |
32-
33-
- MOSEI
34-
35-
| Model | MAE | Corr | Acc-2 | F1-Score |
36-
| :---: | :---: | :---: | :---: | :---: |
37-
| BERT-MULT | | | |
38-
| BERT-MISA | | | |
39-
| BERT-MAG | | | |
40-
| SELF-MM | | | |
41-
42-
- SIMS
4361

44-
| Model | MAE | Corr | Acc-2 | F1-Score |
45-
| :---: | :---: | :---: | :---: | :---: |
46-
| BERT-MULT | | | |
47-
| BERT-MISA | | | |
48-
| BERT-MAG | | | |
49-
| SELF-MM | | | |
62+
> We will update all results in another repo, [MMSA](https://github.com/thuiar/MMSA), in the next few days.
5063
5164

5265
### Paper
@@ -56,6 +69,7 @@ Please cite our paper if you find our work useful for your research:
5669
@inproceedings{yu2021le,
5770
title={Learning Modality-Specific Representations with Self-Supervised Multi-Task Learning for Multimodal Sentiment Analysis},
5871
author={Yu, Wenmeng and Xu, Hua and Ziqi, Yuan and Jiele, Wu},
72+
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
5973
year={2021}
6074
}
6175
```

ResultAnalysis.ipynb

Lines changed: 0 additions & 425 deletions
This file was deleted.

assets/MainModel.pdf

107 KB
Binary file not shown.
Lines changed: 85 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,109 @@
11
import os
22
import argparse
33

4-
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
4+
from utils.functions import Storage
55

6-
__all__ = ['Config']
7-
8-
class Storage(dict):
9-
"""
10-
A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']`
11-
ref: https://blog.csdn.net/a200822146085/article/details/88430450
12-
"""
13-
def __getattr__(self, key):
14-
try:
15-
return self[key]
16-
except KeyError as k:
17-
raise AttributeError(k)
18-
19-
def __setattr__(self, key, value):
20-
self[key] = value
21-
22-
def __delattr__(self, key):
23-
try:
24-
del self[key]
25-
except KeyError as k:
26-
raise AttributeError(k)
27-
28-
def __str__(self):
29-
return "<" + self.__class__.__name__ + dict.__repr__(self) + ">"
30-
31-
class Config():
6+
class ConfigRegression():
327
def __init__(self, args):
33-
# parameters for data
34-
# global parameters for running
35-
self.globalArgs = args
368
# hyper parameters for models
37-
self.HYPER_MODEL_MAP = {
9+
HYPER_MODEL_MAP = {
3810
'self_mm': self.__SELF_MM
3911
}
4012
# hyper parameters for datasets
41-
self.HYPER_DATASET_MAP = self.__datasetCommonParams()
13+
HYPER_DATASET_MAP = self.__datasetCommonParams()
14+
15+
# normalize
16+
model_name = str.lower(args.modelName)
17+
dataset_name = str.lower(args.datasetName)
18+
# load params
19+
commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
20+
dataArgs = HYPER_DATASET_MAP[dataset_name]
21+
dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
22+
# integrate all parameters
23+
self.args = Storage(dict(vars(args),
24+
**dataArgs,
25+
**commonArgs,
26+
**HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
27+
))
4228

4329
def __datasetCommonParams(self):
44-
assert self.globalArgs.datasetName in ['mosi', 'mosei', 'sims']
45-
46-
tmp = "aligned" if self.globalArgs.aligned else "unaligned"
47-
if self.globalArgs.datasetName in ['mosi', 'mosei']:
48-
text_len = 50
49-
elif self.globalArgs.datasetName in ['sims']:
50-
text_len = 39
51-
52-
dataPath = os.path.join(self.globalArgs.data_dir, self.globalArgs.datasetName, \
53-
tmp + '_' + str(text_len) + '.pkl')
30+
root_dataset_dir = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/StandardDatasets'
5431
tmp = {
5532
'mosi':{
56-
'dataPath': dataPath,
57-
'input_lens': (50, 50, 50) if self.globalArgs.aligned else (50, 500, 375),
58-
# (text, audio, video)
59-
'feature_dims': (768, 5, 20),
60-
'train_samples': 1284,
61-
'language': 'en',
62-
'KeyEval': 'Loss'
33+
'aligned': {
34+
'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/aligned_50.pkl'),
35+
'seq_lens': (50, 50, 50),
36+
# (text, audio, video)
37+
'feature_dims': (768, 5, 20),
38+
'train_samples': 1284,
39+
'num_classes': 3,
40+
'language': 'en',
41+
'KeyEval': 'Loss'
42+
},
43+
'unaligned': {
44+
'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'),
45+
'seq_lens': (50, 50, 50),
46+
# (text, audio, video)
47+
'feature_dims': (768, 5, 20),
48+
'train_samples': 1284,
49+
'num_classes': 3,
50+
'language': 'en',
51+
'KeyEval': 'Loss'
52+
}
6353
},
6454
'mosei':{
65-
'dataPath': dataPath,
66-
'input_lens': (50, 50, 50) if self.globalArgs.aligned else (50, 500, 375),
67-
# (text, audio, video)
68-
'feature_dims': (768, 74, 35),
69-
'train_samples': 16326,
70-
'language': 'en',
71-
'KeyEval': 'Loss'
55+
'aligned': {
56+
'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/aligned_50.pkl'),
57+
'seq_lens': (50, 50, 50),
58+
# (text, audio, video)
59+
'feature_dims': (768, 74, 35),
60+
'train_samples': 16326,
61+
'num_classes': 3,
62+
'language': 'en',
63+
'KeyEval': 'Loss'
64+
},
65+
'unaligned': {
66+
'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'),
67+
'seq_lens': (50, 500, 375),
68+
# (text, audio, video)
69+
'feature_dims': (768, 74, 35),
70+
'train_samples': 16326,
71+
'num_classes': 3,
72+
'language': 'en',
73+
'KeyEval': 'Loss'
74+
}
7275
},
7376
'sims':{
74-
'dataPath': dataPath,
75-
# (batch_size, input_lens, feature_dim)
76-
'input_lens': (39, 400, 55), # (text, audio, video)
77-
'feature_dims': (768, 33, 709), # (text, audio, video)
78-
'train_samples': 1368,
79-
'language': 'cn',
80-
'KeyEval': 'Loss',
81-
},
77+
'unaligned': {
78+
'dataPath': os.path.join(root_dataset_dir, 'SIMS/Processed/features/unaligned_39.pkl'),
79+
# (batch_size, seq_lens, feature_dim)
80+
'seq_lens': (39, 400, 55), # (text, audio, video)
81+
'feature_dims': (768, 33, 709), # (text, audio, video)
82+
'train_samples': 1368,
83+
'num_classes': 3,
84+
'language': 'cn',
85+
'KeyEval': 'Loss',
86+
}
87+
}
8288
}
8389
return tmp
8490

8591
def __SELF_MM(self):
8692
tmp = {
8793
'commonParas':{
88-
'need_align': False,
89-
'need_normalize': False,
94+
'need_data_aligned': False,
95+
'need_model_aligned': False,
96+
'need_normalized': False,
9097
'use_bert': True,
9198
'use_finetune': True,
92-
'early_stop': 12,
93-
'task_type': 'regression', # regression / classification
94-
'tasks': 'M'
99+
'save_labels': True,
100+
'early_stop': 8,
101+
'update_epochs': 4
95102
},
96103
# dataset
97104
'datasetParas':{
98105
'mosi':{
99106
# the batch_size of each epoch is update_epochs * batch_size
100-
'update_epochs': 4,
101107
'batch_size': 32,
102108
'learning_rate_bert': 5e-5,
103109
'learning_rate_audio': 1e-3,
@@ -107,7 +113,6 @@ def __SELF_MM(self):
107113
'weight_decay_audio': 0.01,
108114
'weight_decay_video': 0.001,
109115
'weight_decay_other': 0.001,
110-
'num_classes': 1,
111116
# feature subNets
112117
'a_lstm_hidden_size': 32,
113118
'v_lstm_hidden_size': 64,
@@ -133,7 +138,6 @@ def __SELF_MM(self):
133138
},
134139
'mosei':{
135140
# the batch_size of each epoch is update_epochs * batch_size
136-
'update_epochs': 4,
137141
'batch_size': 32,
138142
'learning_rate_bert': 5e-5,
139143
'learning_rate_audio': 0.005,
@@ -143,7 +147,6 @@ def __SELF_MM(self):
143147
'weight_decay_audio': 0.0,
144148
'weight_decay_video': 0.0,
145149
'weight_decay_other': 0.01,
146-
'num_classes': 1,
147150
# feature subNets
148151
'a_lstm_hidden_size': 32,
149152
'v_lstm_hidden_size': 32,
@@ -169,19 +172,17 @@ def __SELF_MM(self):
169172
},
170173
'sims':{
171174
# the batch_size of each epoch is update_epochs * batch_size
172-
'update_epochs': 4,
173-
'batch_size': 16,
175+
'batch_size': 32,
174176
'learning_rate_bert': 5e-5,
175177
'learning_rate_audio': 0.005,
176-
'learning_rate_video': 0.005,
178+
'learning_rate_video': 0.001,
177179
'learning_rate_other': 0.001,
178-
'weight_decay_bert': 0.001,
179-
'weight_decay_audio': 0.01,
180+
'weight_decay_bert': 0.01,
181+
'weight_decay_audio': 0.0,
180182
'weight_decay_video': 0.0,
181183
'weight_decay_other': 0.001,
182-
'num_classes': 1,
183184
# feature subNets
184-
'a_lstm_hidden_size': 32,
185+
'a_lstm_hidden_size': 16,
185186
'v_lstm_hidden_size': 64,
186187
'a_lstm_layers': 1,
187188
'v_lstm_layers': 1,
@@ -192,11 +193,11 @@ def __SELF_MM(self):
192193
'v_lstm_dropout': 0.0,
193194
't_bert_dropout':0.1,
194195
# post feature
195-
'post_fusion_dim': 64,
196+
'post_fusion_dim': 128,
196197
'post_text_dim':64,
197-
'post_audio_dim': 16,
198+
'post_audio_dim': 32,
198199
'post_video_dim': 16,
199-
'post_fusion_dropout': 0.0,
200+
'post_fusion_dropout': 0.1,
200201
'post_text_dropout': 0.1,
201202
'post_audio_dropout': 0.0,
202203
'post_video_dropout': 0.1,
@@ -208,12 +209,4 @@ def __SELF_MM(self):
208209
return tmp
209210

210211
def get_config(self):
211-
# normalize
212-
model_name = str.lower(self.globalArgs.modelName)
213-
dataset_name = str.lower(self.globalArgs.datasetName)
214-
# integrate all parameters
215-
res = Storage(dict(vars(self.globalArgs),
216-
**self.HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
217-
**self.HYPER_MODEL_MAP[model_name]()['commonParas'],
218-
**self.HYPER_DATASET_MAP[dataset_name]))
219-
return res
212+
return self.args

0 commit comments

Comments
 (0)