From 9a8c458c5e7c57d272cf8f7f88f6e33c23fabb80 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 9 Jan 2025 14:17:12 +0800 Subject: [PATCH] update --- swift/megatron/__init__.py | 10 +++++++--- swift/megatron/init.py | 38 ++++++++++++++++++++++++++++++++++++++ swift/megatron/utils.py | 32 -------------------------------- 3 files changed, 45 insertions(+), 35 deletions(-) create mode 100644 swift/megatron/init.py diff --git a/swift/megatron/__init__.py b/swift/megatron/__init__.py index 418fba95b..0a75b056d 100644 --- a/swift/megatron/__init__.py +++ b/swift/megatron/__init__.py @@ -1,6 +1,10 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .convert import convert_hf2megatron, convert_megatron2hf -from .utils import init_megatron_env +try: + from .init import init_megatron_env + init_megatron_env() +except Exception: + # allows lint pass. + raise -init_megatron_env() +from .convert import convert_hf2megatron, convert_megatron2hf diff --git a/swift/megatron/init.py b/swift/megatron/init.py new file mode 100644 index 000000000..4291ce62a --- /dev/null +++ b/swift/megatron/init.py @@ -0,0 +1,38 @@ +import os +import shutil +import sys + +from swift.llm import git_clone_github +from swift.utils import is_megatron_available, safe_ddp_context, subprocess_run + + +def _rename_files(): + megatron_patch_path = os.environ['PAI_MEGATRON_PATCH_PATH'] + qwen_folders = ['toolkits/model_checkpoints_convertor/qwen'] + for folder in qwen_folders: + dir_path = os.path.join(megatron_patch_path, folder) + for fname in os.listdir(dir_path): + old_path = os.path.join(dir_path, fname) + fname = fname.replace('qwen1.', 'qwen1_') + fname = fname.replace('qwen2.', 'qwen2_') + new_path = os.path.join(dir_path, fname) + if old_path != new_path and os.path.exists(old_path): + shutil.move(old_path, new_path) + + +def init_megatron_env() -> None: + if 'MEGATRON_LM_PATH' not in os.environ: + os.environ['MEGATRON_LM_PATH'] = git_clone_github( + 'https://github.com/NVIDIA/Megatron-LM', branch='core_r0.10.0') + if not is_megatron_available(): + subprocess_run(['pip', 'install', '-e', os.environ['MEGATRON_LM_PATH']]) + sys.path.append(os.environ['MEGATRON_LM_PATH']) + + if 'PAI_MEGATRON_PATCH_PATH' not in os.environ: + os.environ['PAI_MEGATRON_PATCH_PATH'] = git_clone_github( + 'https://github.com/alibaba/Pai-Megatron-Patch', commit_hash='v0.10.1') + sys.path.append(os.environ['PAI_MEGATRON_PATCH_PATH']) + + # rename qwen1.5/2.5->qwen1_5/2_5 files + with safe_ddp_context('rename_files'): + _rename_files() diff --git a/swift/megatron/utils.py b/swift/megatron/utils.py index 0d99f1951..6568974a4 100644 --- a/swift/megatron/utils.py +++ b/swift/megatron/utils.py @@ -19,38 +19,6 @@ logger = get_logger() -def _rename_files(): - megatron_patch_path = os.environ['PAI_MEGATRON_PATCH_PATH'] - qwen_folders = ['toolkits/model_checkpoints_convertor/qwen'] - for folder in qwen_folders: - dir_path = os.path.join(megatron_patch_path, folder) - for fname in os.listdir(dir_path): - old_path = os.path.join(dir_path, fname) - fname = fname.replace('qwen1.', 'qwen1_') - fname = fname.replace('qwen2.', 'qwen2_') - new_path = os.path.join(dir_path, fname) - if old_path != new_path and os.path.exists(old_path): - shutil.move(old_path, new_path) - - -def init_megatron_env() -> None: - if 'MEGATRON_LM_PATH' not in os.environ: - os.environ['MEGATRON_LM_PATH'] = git_clone_github( - 'https://github.com/NVIDIA/Megatron-LM', branch='core_r0.10.0') - if not is_megatron_available(): - subprocess_run(['pip', 'install', '-e', os.environ['MEGATRON_LM_PATH']]) - sys.path.append(os.environ['MEGATRON_LM_PATH']) - - if 'PAI_MEGATRON_PATCH_PATH' not in os.environ: - os.environ['PAI_MEGATRON_PATCH_PATH'] = git_clone_github( - 'https://github.com/alibaba/Pai-Megatron-Patch', commit_hash='v0.10.1') - sys.path.append(os.environ['PAI_MEGATRON_PATCH_PATH']) - - # rename qwen1.5/2.5->qwen1_5/2_5 files - with safe_ddp_context('rename_files'): - _rename_files() - - def patch_megatron(tokenizer): def build_tokenizer(args):