1195343015
diff --git a/‎README.md
Lines changed: 6 additions & 6 deletions b/‎README.md
Lines changed: 6 additions & 6 deletions
diff --git a/‎examples/detxoify_lm/finetune_gpt.py
Lines changed: 7 additions & 7 deletions b/‎examples/detxoify_lm/finetune_gpt.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎examples/detxoify_lm/generate_samples_gpt.py
Lines changed: 13 additions & 13 deletions b/‎examples/detxoify_lm/generate_samples_gpt.py
Lines changed: 13 additions & 13 deletions
diff --git a/‎examples/deploy/README.md renamed to ‎examples/inference/README.md
Lines changed: 3 additions & 3 deletions b/‎examples/deploy/README.md renamed to ‎examples/inference/README.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/deploy/ptq_trtllm_llama_7b.sh renamed to ‎examples/inference/ptq_trtllm_llama_7b.sh
Lines changed: 2 additions & 2 deletions b/‎examples/deploy/ptq_trtllm_llama_7b.sh renamed to ‎examples/inference/ptq_trtllm_llama_7b.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/deploy/ptq_trtllm_nemotron3_8b.sh renamed to ‎examples/inference/ptq_trtllm_nemotron3_8b.sh
Lines changed: 2 additions & 2 deletions b/‎examples/deploy/ptq_trtllm_nemotron3_8b.sh renamed to ‎examples/inference/ptq_trtllm_nemotron3_8b.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/deploy/text_generation_ptq.py renamed to ‎examples/inference/text_generation_ptq.py
Lines changed: 7 additions & 7 deletions b/‎examples/deploy/text_generation_ptq.py renamed to ‎examples/inference/text_generation_ptq.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎examples/deploy/trtllm_text_generation.py renamed to ‎examples/inference/trtllm_text_generation.py b/‎examples/deploy/trtllm_text_generation.py renamed to ‎examples/inference/trtllm_text_generation.py
diff --git a/‎megatron/core/deploy/__init__.py renamed to ‎megatron/core/inference/__init__.py b/‎megatron/core/deploy/__init__.py renamed to ‎megatron/core/inference/__init__.py
diff --git a/‎megatron/core/deploy/gpt/__init__.py renamed to ‎megatron/core/inference/gpt/__init__.py b/‎megatron/core/deploy/gpt/__init__.py renamed to ‎megatron/core/inference/gpt/__init__.py
@@ -10,19 +10,19 @@
 import sys
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
                                              os.path.pardir, os.path.pardir)))
-from megatron import get_args
-from megatron import get_timers
-from megatron import get_tokenizer
-from megatron import print_rank_0
+from megatron.training import get_args
+from megatron.training import get_timers
+from megatron.training import get_tokenizer
+from megatron.training import print_rank_0
 from megatron.core import mpu
 from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
 from megatron.core.datasets.blended_megatron_dataset_config import GPTDatasetConfig
 from megatron.core.datasets.gpt_dataset import GPTDataset
-from megatron.model import GPTModel
+from megatron.legacy.model import GPTModel
 from megatron.core.enums import ModelType
 from megatron.training import pretrain
-from megatron.utils import get_ltor_masks_and_position_ids
-from megatron.utils import average_losses_across_data_parallel_group
+from megatron.training.utils import get_ltor_masks_and_position_ids
+from megatron.training.utils import average_losses_across_data_parallel_group
 
 def model_provider(pre_process=True, post_process=True):
     """Build the model."""
 
@@ -9,24 +9,24 @@
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
                                              os.path.pardir, os.path.pardir)))
 import torch
-from megatron import get_args
-from megatron import get_tokenizer
-from megatron import print_rank_0
-from megatron.checkpointing import load_checkpoint
+from megatron.training import get_args
+from megatron.training import get_tokenizer
+from megatron.training import print_rank_0
+from megatron.training.checkpointing import load_checkpoint
 from megatron.core import mpu
-from megatron.initialize import initialize_megatron
-from megatron.model import GPTModel
+from megatron.training.initialize import initialize_megatron
+from megatron.legacy.model import GPTModel
 from megatron.training import get_model
-from megatron.text_generation import generate_and_post_process
-from megatron.arguments import core_transformer_config_from_args
+from megatron.inference.text_generation import generate_and_post_process
+from megatron.training.arguments import core_transformer_config_from_args
 from megatron.core.models.gpt import GPTModel
 from typing import Union
-import megatron.model
+import megatron.legacy.model
 from megatron.core.transformer.spec_utils import import_module
-from megatron.arguments import core_transformer_config_from_args
+from megatron.training.arguments import core_transformer_config_from_args
 from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec, get_gpt_layer_local_spec
 
-def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megatron.model.GPTModel]:
+def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megatron.legacy.model.GPTModel]:
     """Builds the model.
 
     If you set the use_mcore_models to True, it will return the mcore GPT model and if not the legacy GPT model.
@@ -37,7 +37,7 @@ def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megat
 
 
     Returns:
-        Union[GPTModel, megatron.model.GPTModel]: The returned model
+        Union[GPTModel, megatron.legacy.model.GPTModel]: The returned model
     """
     args = get_args()
 
@@ -83,7 +83,7 @@ def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megat
     else:
         assert(args.context_parallel_size == 1), "Context parallelism is only supported with Megatron Core!"
 
-        model = megatron.model.GPTModel(
+        model = megatron.legacy.model.GPTModel(
             config,
             num_tokentypes=0,
             parallel_output=True,
 
@@ -73,7 +73,7 @@ python -c "import ammo.torch.quantization.extensions as ext; print(ext.cuda_ext)
 launch_config="--nproc_per_node=${TP}"
 
 # Launch multi-process with torchrun
-torchrun ${launch_config} examples/deploy/text_generation_ptq.py ${options} ${additional_options} --load ${CHECKPOINT_LOAD_DIR}
+torchrun ${launch_config} examples/inference/text_generation_ptq.py ${options} ${additional_options} --load ${CHECKPOINT_LOAD_DIR}
 
 # This script is using mpi4py which will fork multiple processes.
-python examples/deploy/trtllm_text_generation.py ${trtllm_options}
+python examples/inference/trtllm_text_generation.py ${trtllm_options}
@@ -68,8 +68,8 @@ python -c "import ammo.torch.quantization.extensions as ext; print(ext.cuda_ext)
 launch_config="--nproc_per_node=${TP}"
 
 # Launch multi-process with torchrun
-torchrun ${launch_config} examples/deploy/text_generation_ptq.py ${options} ${additional_options} --load ${CHECKPOINT_LOAD_DIR}
+torchrun ${launch_config} examples/inference/text_generation_ptq.py ${options} ${additional_options} --load ${CHECKPOINT_LOAD_DIR}
 
 # This script is using mpi4py which will fork multiple processes.
-python examples/deploy/trtllm_text_generation.py ${trtllm_options}
+python examples/inference/trtllm_text_generation.py ${trtllm_options}
 
@@ -13,16 +13,16 @@
 from datasets import load_dataset
 
 # [ModelOpt]: changing the default model provider to the AMMO version
-from megatron import get_args, print_rank_0
-from megatron.checkpointing import load_checkpoint, save_checkpoint
+from megatron.training import get_args, print_rank_0
+from megatron.training.checkpointing import load_checkpoint, save_checkpoint
 from megatron.core import mpu
 from megatron.core.dist_checkpointing import load
-from megatron.deploy.arguments import add_ammo_args
-from megatron.deploy.gpt.model_provider import model_provider
-from megatron.initialize import initialize_megatron
-from megatron.text_generation import generate_and_post_process
+from megatron.inference.arguments import add_ammo_args
+from megatron.inference.gpt.model_provider import model_provider
+from megatron.training.initialize import initialize_megatron
+from megatron.inference.text_generation import generate_and_post_process
 from megatron.training import get_model
-from megatron.utils import unwrap_model
+from megatron.training.utils import unwrap_model
 
 QUANT_CFG_CHOICES = {
     "int8": atq.INT8_DEFAULT_CFG,