You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# The reason for writing the code this way is to avoid errors that occur during multi-GPU inference due to tensors not being on the same device. By ensuring that the first and last layers of the large language model (LLM) are on the same device, we prevent such errors.
130
+
defsplit_model(model_name, num_layers=None):
131
+
device_map= {}
132
+
world_size=torch.cuda.device_count()
133
+
ifnum_layersisNone:
134
+
num_layers= {
135
+
"InternVL2_5-1B": 24,
136
+
"InternVL2_5-2B": 24,
137
+
"InternVL2_5-4B": 36,
138
+
"InternVL2_5-8B": 32,
139
+
"InternVL2_5-26B": 48,
140
+
"InternVL2_5-38B": 64,
141
+
"InternVL2_5-78B": 80,
142
+
"InternVL2-1B": 24,
143
+
"InternVL2-2B": 24,
144
+
"InternVL2-4B": 32,
145
+
"InternVL2-8B": 32,
146
+
"InternVL2-26B": 48,
147
+
"InternVL2-40B": 60,
148
+
"InternVL2-Llama3-76B": 80,
149
+
}[model_name]
150
+
# Since the first GPU will be used for ViT, treat it as half a GPU.
assertaccelerator.distributed_typein [DistributedType.FSDP, DistributedType.MULTI_GPU, DistributedType.DEEPSPEED], "Unsupported distributed type provided. Only DDP and FSDP are supported."
164
212
# If you want to use DistributedType.DEEPSPEED, you have to run accelerate config before using the model
0 commit comments