|
| 1 | +import os |
| 2 | +import shutil |
| 3 | +import sys |
| 4 | + |
| 5 | +from swift.llm import git_clone_github |
| 6 | +from swift.utils import is_megatron_available, safe_ddp_context, subprocess_run |
| 7 | + |
| 8 | + |
| 9 | +def _rename_files(): |
| 10 | + megatron_patch_path = os.environ['PAI_MEGATRON_PATCH_PATH'] |
| 11 | + qwen_folders = ['toolkits/model_checkpoints_convertor/qwen'] |
| 12 | + for folder in qwen_folders: |
| 13 | + dir_path = os.path.join(megatron_patch_path, folder) |
| 14 | + for fname in os.listdir(dir_path): |
| 15 | + old_path = os.path.join(dir_path, fname) |
| 16 | + fname = fname.replace('qwen1.', 'qwen1_') |
| 17 | + fname = fname.replace('qwen2.', 'qwen2_') |
| 18 | + new_path = os.path.join(dir_path, fname) |
| 19 | + if old_path != new_path and os.path.exists(old_path): |
| 20 | + shutil.move(old_path, new_path) |
| 21 | + |
| 22 | + |
| 23 | +def init_megatron_env() -> None: |
| 24 | + if 'MEGATRON_LM_PATH' not in os.environ: |
| 25 | + os.environ['MEGATRON_LM_PATH'] = git_clone_github( |
| 26 | + 'https://github.com/NVIDIA/Megatron-LM', branch='core_r0.10.0') |
| 27 | + if not is_megatron_available(): |
| 28 | + subprocess_run(['pip', 'install', '-e', os.environ['MEGATRON_LM_PATH']]) |
| 29 | + sys.path.append(os.environ['MEGATRON_LM_PATH']) |
| 30 | + |
| 31 | + if 'PAI_MEGATRON_PATCH_PATH' not in os.environ: |
| 32 | + os.environ['PAI_MEGATRON_PATCH_PATH'] = git_clone_github( |
| 33 | + 'https://github.com/alibaba/Pai-Megatron-Patch', commit_hash='v0.10.1') |
| 34 | + sys.path.append(os.environ['PAI_MEGATRON_PATCH_PATH']) |
| 35 | + |
| 36 | + # rename qwen1.5/2.5->qwen1_5/2_5 files |
| 37 | + with safe_ddp_context('rename_files'): |
| 38 | + _rename_files() |
0 commit comments