-
Notifications
You must be signed in to change notification settings - Fork 1
/
diff.txt
226 lines (215 loc) · 12.5 KB
/
diff.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
diff --git a/EZ/.gitignore b/EZ/.gitignore
index 0e20a0f..ee44c08 100644
--- a/EZ/.gitignore
+++ b/EZ/.gitignore
@@ -3,8 +3,6 @@ __pycache__/
*.pyc
*$py.class
-core/ctree/UNKOWN*
-core/ctree/build
# Pycharm
.idea/*
diff --git a/EZ/config/atari/__init__.py b/EZ/config/atari/__init__.py
index dbb2b98..fac7559 100644
--- a/EZ/config/atari/__init__.py
+++ b/EZ/config/atari/__init__.py
@@ -12,9 +12,9 @@ class AtariConfig(BaseConfig):
super(AtariConfig, self).__init__(
training_steps=100000,
last_steps=20000,
- test_interval=1000,
- log_interval=1,
- vis_interval=10,
+ test_interval=10000,
+ log_interval=10,
+ vis_interval=1000,
test_episodes=32,
checkpoint_interval=100,
target_model_interval=200,
diff --git a/EZ/main.py b/EZ/main.py
index f7f5d1f..ffde00d 100644
--- a/EZ/main.py
+++ b/EZ/main.py
@@ -16,34 +16,26 @@ if __name__ == '__main__':
parser.add_argument('--env', required=True, help='Name of the environment')
parser.add_argument('--result_dir', default=os.path.join(os.getcwd(), 'results'),
help="Directory Path to store results (default: %(default)s)")
- parser.add_argument('--case', required=True, choices=['atari', 'rubik'],
+ parser.add_argument('--case', required=True, choices=['atari'],
help="It's used for switching between different domains(default: %(default)s)")
parser.add_argument('--opr', required=True, choices=['train', 'test'])
parser.add_argument('--amp_type', required=True, choices=['torch_amp', 'none'],
help='choose automated mixed precision type')
- parser.add_argument('--no_cuda', action='store_true',
- default=False, help='no cuda usage (default: %(default)s)')
+ parser.add_argument('--no_cuda', action='store_true', default=False, help='no cuda usage (default: %(default)s)')
parser.add_argument('--debug', action='store_true', default=False,
help='If enabled, logs additional values '
'(gradients, target value, reward distribution, etc.) (default: %(default)s)')
parser.add_argument('--render', action='store_true', default=False,
help='Renders the environment (default: %(default)s)')
- parser.add_argument('--save_video', action='store_true',
- default=False, help='save video in test.')
+ parser.add_argument('--save_video', action='store_true', default=False, help='save video in test.')
parser.add_argument('--force', action='store_true', default=False,
help='Overrides past results (default: %(default)s)')
- parser.add_argument('--cpu_actor', type=int,
- default=14, help='batch cpu actor')
- parser.add_argument('--gpu_actor', type=int,
- default=20, help='batch bpu actor')
- parser.add_argument('--p_mcts_num', type=int, default=4,
- help='number of parallel mcts')
- parser.add_argument('--seed', type=int, default=0,
- help='seed (default: %(default)s)')
- parser.add_argument('--num_gpus', type=int,
- default=4, help='gpus available')
- parser.add_argument('--num_cpus', type=int,
- default=80, help='cpus available')
+ parser.add_argument('--cpu_actor', type=int, default=14, help='batch cpu actor')
+ parser.add_argument('--gpu_actor', type=int, default=20, help='batch bpu actor')
+ parser.add_argument('--p_mcts_num', type=int, default=4, help='number of parallel mcts')
+ parser.add_argument('--seed', type=int, default=0, help='seed (default: %(default)s)')
+ parser.add_argument('--num_gpus', type=int, default=4, help='gpus available')
+ parser.add_argument('--num_cpus', type=int, default=80, help='cpus available')
parser.add_argument('--revisit_policy_search_rate', type=float, default=0.99,
help='Rate at which target policy is re-estimated (default: %(default)s)')
parser.add_argument('--use_root_value', action='store_true', default=False,
@@ -51,29 +43,20 @@ if __name__ == '__main__':
parser.add_argument('--use_priority', action='store_true', default=False,
help='Uses priority for data sampling in replay buffer. '
'Also, priority for new data is calculated based on loss (default: False)')
- parser.add_argument('--use_max_priority', action='store_true',
- default=False, help='max priority')
- parser.add_argument('--test_episodes', type=int, default=10,
- help='Evaluation episode count (default: %(default)s)')
- parser.add_argument('--use_augmentation', action='store_true',
- default=True, help='use augmentation')
+ parser.add_argument('--use_max_priority', action='store_true', default=False, help='max priority')
+ parser.add_argument('--test_episodes', type=int, default=10, help='Evaluation episode count (default: %(default)s)')
+ parser.add_argument('--use_augmentation', action='store_true', default=True, help='use augmentation')
parser.add_argument('--augmentation', type=str, default=['shift', 'intensity'], nargs='+',
- choices=['none', 'rrc', 'affine', 'crop',
- 'blur', 'shift', 'intensity'],
+ choices=['none', 'rrc', 'affine', 'crop', 'blur', 'shift', 'intensity'],
help='Style of augmentation')
- parser.add_argument('--info', type=str, default='none',
- help='debug string')
- parser.add_argument('--load_model', action='store_true',
- default=False, help='choose to load model')
- parser.add_argument('--model_path', type=str,
- default='./results/test_model.p', help='load model path')
- parser.add_argument('--object_store_memory', type=int,
- default=150 * 1024 * 1024 * 1024, help='object store memory')
+ parser.add_argument('--info', type=str, default='none', help='debug string')
+ parser.add_argument('--load_model', action='store_true', default=False, help='choose to load model')
+ parser.add_argument('--model_path', type=str, default='./results/test_model.p', help='load model path')
+ parser.add_argument('--object_store_memory', type=int, default=150 * 1024 * 1024 * 1024, help='object store memory')
# Process arguments
args = parser.parse_args()
- args.device = 'cuda' if (
- not args.no_cuda) and torch.cuda.is_available() else 'cpu'
+ args.device = 'cuda' if (not args.no_cuda) and torch.cuda.is_available() else 'cpu'
assert args.revisit_policy_search_rate is None or 0 <= args.revisit_policy_search_rate <= 1, \
' Revisit policy search rate should be in [0,1]'
@@ -89,8 +72,6 @@ if __name__ == '__main__':
# import corresponding configuration , neural networks and envs
if args.case == 'atari':
from config.atari import game_config
- elif args.case == 'rubik':
- from config.rubik import game_config
else:
raise Exception('Invalid --case option')
@@ -101,8 +82,7 @@ if __name__ == '__main__':
# set-up logger
init_logger(log_base_path)
logging.getLogger('train').info('Path: {}'.format(exp_path))
- logging.getLogger('train').info(
- 'Param: {}'.format(game_config.get_hparams()))
+ logging.getLogger('train').info('Param: {}'.format(game_config.get_hparams()))
device = game_config.device
try:
@@ -115,8 +95,7 @@ if __name__ == '__main__':
model, weights = train(game_config, summary_writer, model_path)
model.set_weights(weights)
total_steps = game_config.training_steps + game_config.last_steps
- test_score, _, test_path = test(game_config, model.to(
- device), total_steps, game_config.test_episodes, device, render=False, save_video=args.save_video, final_test=True, use_pb=True)
+ test_score, _, test_path = test(game_config, model.to(device), total_steps, game_config.test_episodes, device, render=False, save_video=args.save_video, final_test=True, use_pb=True)
mean_score = test_score.mean()
std_score = test_score.std()
@@ -125,42 +104,32 @@ if __name__ == '__main__':
'std_score': std_score,
}
for key, val in test_log.items():
- summary_writer.add_scalar(
- 'train/{}'.format(key), np.mean(val), total_steps)
+ summary_writer.add_scalar('train/{}'.format(key), np.mean(val), total_steps)
test_msg = '#{:<10} Test Mean Score of {}: {:<10} (max: {:<10}, min:{:<10}, std: {:<10})' \
- ''.format(total_steps, game_config.env_name, mean_score,
- test_score.max(), test_score.min(), std_score)
+ ''.format(total_steps, game_config.env_name, mean_score, test_score.max(), test_score.min(), std_score)
logging.getLogger('train_test').info(test_msg)
if args.save_video:
- logging.getLogger('train_test').info(
- 'Saving video in path: {}'.format(test_path))
+ logging.getLogger('train_test').info('Saving video in path: {}'.format(test_path))
elif args.opr == 'test':
assert args.load_model
if args.model_path is None:
model_path = game_config.model_path
else:
model_path = args.model_path
- assert os.path.exists(
- model_path), 'model not found at {}'.format(model_path)
+ assert os.path.exists(model_path), 'model not found at {}'.format(model_path)
model = game_config.get_uniform_network().to(device)
- model.load_state_dict(torch.load(
- model_path, map_location=torch.device(device)))
- test_score, _, test_path = test(game_config, model, 0, args.test_episodes, device=device,
- render=args.render, save_video=args.save_video, final_test=True, use_pb=True)
+ model.load_state_dict(torch.load(model_path, map_location=torch.device(device)))
+ test_score, _, test_path = test(game_config, model, 0, args.test_episodes, device=device, render=args.render, save_video=args.save_video, final_test=True, use_pb=True)
mean_score = test_score.mean()
std_score = test_score.std()
- logging.getLogger('test').info('Test Mean Score: {} (max: {}, min: {})'.format(
- mean_score, test_score.max(), test_score.min()))
- logging.getLogger('test').info(
- 'Test Std Score: {}'.format(std_score))
+ logging.getLogger('test').info('Test Mean Score: {} (max: {}, min: {})'.format(mean_score, test_score.max(), test_score.min()))
+ logging.getLogger('test').info('Test Std Score: {}'.format(std_score))
if args.save_video:
- logging.getLogger('test').info(
- 'Saving video in path: {}'.format(test_path))
+ logging.getLogger('test').info('Saving video in path: {}'.format(test_path))
else:
- raise Exception(
- 'Please select a valid operation(--opr) to be performed')
+ raise Exception('Please select a valid operation(--opr) to be performed')
ray.shutdown()
except Exception as e:
logging.getLogger('root').error(e, exc_info=True)
diff --git a/EZ/test.sh b/EZ/test.sh
index 9c6258b..417b7f3 100644
--- a/EZ/test.sh
+++ b/EZ/test.sh
@@ -2,7 +2,7 @@ set -ex
export CUDA_DEVICE_ORDER='PCI_BUS_ID'
export CUDA_VISIBLE_DEVICES=0
-python main.py --env BreakoutNoFrameskip-v4 --case rubik --opr test --seed 0 --num_gpus 1 --num_cpus 1 --force \
+python main.py --env BreakoutNoFrameskip-v4 --case atari --opr test --seed 0 --num_gpus 1 --num_cpus 20 --force \
--test_episodes 32 \
--load_model \
--amp_type 'torch_amp' \
diff --git a/EZ/train.sh b/EZ/train.sh
index 5d34c40..b0c984d 100644
--- a/EZ/train.sh
+++ b/EZ/train.sh
@@ -3,8 +3,7 @@ export CUDA_DEVICE_ORDER='PCI_BUS_ID'
export CUDA_VISIBLE_DEVICES=0,1,2,3
python main.py --env BreakoutNoFrameskip-v4 --case atari --opr train --force \
- --num_gpus 1 --num_cpus 1 --cpu_actor 1 --gpu_actor 1 \
- --object_store_memory 1610612736 \
+ --num_gpus 4 --num_cpus 96 --cpu_actor 14 --gpu_actor 20 \
--seed 0 \
--p_mcts_num 4 \
--use_priority \