modelscope · CJack812 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/examples/train/rft/rft.py b/examples/train/rft/rft.py
@@ -22,7 +22,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int):
     for device in range(device_count):
         sample_cmd = (f'{conda_prefix} USE_OPENCOMPASS_EVALUATOR=True CUDA_VISIBLE_DEVICES={device} swift sample '
                       f'--model {model} --model_type {model_type} '
-                      f'--dataset {" ".join(dataset)} '
+                      f'--dataset {' '.join(dataset)} '
                       f'--data_range {device} {device_count} '
                       f'--max_length 2048 '
                       f'--system "You are a math model, you should **think step by step** carefully, '
@@ -61,7 +61,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int):
         sample_cmd = (
             f'{conda_prefix} USE_OPENCOMPASS_EVALUATOR=True CUDA_VISIBLE_DEVICES={device} swift sample '
             f'--model {model} --model_type {model_type} '  # change to --resume_from_checkpoint to use the latest optimizer state # noqa
-            f'--dataset {" ".join(dataset)} '
+            f'--dataset {' '.join(dataset)} '
             f'--data_range {device} {device_count} '
             f'--max_length 2048 '
             f'--system "You are a math model, you should **think step by step** carefully, '
@@ -91,7 +91,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int):
     for proc, handler in enumerate(handlers):
         handler.wait()
         assert os.path.exists(os.path.join('sample_output', f'iter_{iter}_proc_{proc}_sampling.jsonl')), (
-            f'{os.path.join("sample_output", f"iter_{iter}_proc_{proc}_sampling.jsonl")} not exists, '
+            f'{os.path.join('sample_output', f"iter_{iter}_proc_{proc}_sampling.jsonl")} not exists, '
             'please check the sample logs to get the detail error.')
         datasets.append(os.path.join('sample_output', f'iter_{iter}_proc_{proc}_sampling.jsonl'))
     print(f'Sampling done, files:{datasets}', flush=True)
@@ -110,7 +110,7 @@ def do_train(model: str, model_type: str, datasets: List[str], iter, cmd='sft'):
     ga = 128 // get_device_count() // 2
     train_cmd = (f'{conda_prefix} {gpu_prefix} swift {cmd} '
                  f'--model {model} --model_type {model_type} '
-                 f'--dataset {" ".join(datasets)} '
+                 f'--dataset {' '.join(datasets)} '
                  f'--max_length 2048 '
                  f'--num_train_epochs 1 '
                  f'--load_args false '

diff --git a/scripts/benchmark/exp_utils.py b/scripts/benchmark/exp_utils.py
@@ -122,7 +122,7 @@ def run(self, exp: Experiment):
             exp.runtime = runtime
             envs = deepcopy(runtime.get('env', {}))
             envs.update(os.environ)
-            logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
+            logger.info(f'Running cmd: {runtime['running_cmd']}, env: {runtime.get('env', {})}')
             os.makedirs('exp', exist_ok=True)
             log_file = os.path.join('exp', f'{exp.name}.eval.log')
             exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
@@ -140,7 +140,7 @@ def run(self, exp: Experiment):
             exp.runtime = runtime
             envs = deepcopy(runtime.get('env', {}))
             envs.update(os.environ)
-            logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
+            logger.info(f'Running cmd: {runtime['running_cmd']}, env: {runtime.get('env', {})}')
             os.makedirs('exp', exist_ok=True)
             log_file = os.path.join('exp', f'{exp.name}.{exp.cmd}.log')
             exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
@@ -162,10 +162,10 @@ def _build_eval_cmd(self, exp: Experiment):
         if best_model_checkpoint is not None:
             if not os.path.exists(os.path.join(best_model_checkpoint, 'args.json')):
                 cmd = f'swift eval --ckpt_dir {best_model_checkpoint} ' \
-                      + f'--infer_backend pt --train_type full --eval_dataset {" ".join(eval_dataset)}'
+                      + f'--infer_backend pt --train_type full --eval_dataset {' '.join(eval_dataset)}'
         else:
-            cmd = f'swift eval --model {exp.args.get("model")} --infer_backend pt ' \
-                  f'--eval_dataset {" ".join(eval_dataset)}'
+            cmd = f'swift eval --model {exp.args.get('model')} --infer_backend pt ' \
+                  f'--eval_dataset {' '.join(eval_dataset)}'
 
         return {
             'running_cmd': cmd,

diff --git a/scripts/benchmark/generate_report.py b/scripts/benchmark/generate_report.py
@@ -69,23 +69,23 @@ def tuner_hyper_params(self):
             return ''
         if args['sft_type'] in ('lora', 'adalora', 'longlora'):
             if 'lora_rank' in args:
-                hyper_params += f'rank={args["lora_rank"]}/' \
-                                f'target={args["lora_target_modules"]}/' \
-                                f'alpha={args["lora_alpha"]}/' \
-                                f'lr_ratio={args.get("lora_lr_ratio", None)}/' \
-                                f'use_rslora={args.get("use_rslora", False)}/' \
-                                f'use_dora={args.get("use_dora", False)}'
+                hyper_params += f'rank={args['lora_rank']}/' \
+                                f'target={args['lora_target_modules']}/' \
+                                f'alpha={args['lora_alpha']}/' \
+                                f'lr_ratio={args.get('lora_lr_ratio', None)}/' \
+                                f'use_rslora={args.get('use_rslora', False)}/' \
+                                f'use_dora={args.get('use_dora', False)}'
             else:
                 hyper_params = ''
         if args['sft_type'] == 'full':
             if 'use_galore' in args and args['use_galore'] == 'true':
-                hyper_params += f'galore_rank={args["galore_rank"]}/' \
-                                f'galore_per_parameter={args["galore_optim_per_parameter"]}/' \
-                                f'galore_with_embedding={args["galore_with_embedding"]}/'
+                hyper_params += f'galore_rank={args['galore_rank']}/' \
+                                f'galore_per_parameter={args['galore_optim_per_parameter']}/' \
+                                f'galore_with_embedding={args['galore_with_embedding']}/'
         if args['sft_type'] == 'llamapro':
-            hyper_params += f'num_blocks={args["llamapro_num_new_blocks"]}/'
+            hyper_params += f'num_blocks={args['llamapro_num_new_blocks']}/'
         if 'neftune_noise_alpha' in args and args['neftune_noise_alpha']:
-            hyper_params += f'neftune_noise_alpha={args["neftune_noise_alpha"]}/'
+            hyper_params += f'neftune_noise_alpha={args['neftune_noise_alpha']}/'
 
         if hyper_params.endswith('/'):
             hyper_params = hyper_params[:-1]
@@ -95,8 +95,8 @@ def tuner_hyper_params(self):
     def hyper_parameters(self):
         if 'learning_rate' not in self.args:
             return ''
-        return f'lr={self.args["learning_rate"]}/' \
-               f'epoch={self.args["num_train_epochs"]}'
+        return f'lr={self.args['learning_rate']}/' \
+               f'epoch={self.args['num_train_epochs']}'
 
     @property
     def train_speed(self):
@@ -190,10 +190,10 @@ def generate_sft_report(outputs: List[ModelOutput]):
             ceval_acc = '' if not ceval_acc else f'**{ceval_acc:.3f}**'
 
         line = f'|{output.name}|' \
-               f'{output.args["model_type"]}|' \
-               f'{output.args.get("dataset")}|' \
-               f'{output.args.get("train_dataset_mix_ratio", 0.)}|' \
-               f'{output.args.get("sft_type")}|' \
+               f'{output.args['model_type']}|' \
+               f'{output.args.get('dataset')}|' \
+               f'{output.args.get('train_dataset_mix_ratio', 0.)}|' \
+               f'{output.args.get('sft_type')}|' \
                f'{output.tuner_hyper_params}|' \
                f'{output.num_trainable_parameters}({output.trainable_parameters_percentage})|' \
                f'{use_flash_attn}|' \
@@ -267,14 +267,14 @@ def generate_export_report(outputs: List[ModelOutput]):
             ceval_acc = '' if not ceval_acc else f'**{ceval_acc:.3f}**'
 
         if output.train_dataset_info:
-            dataset_info = f'{output.args["dataset"]}/{output.train_dataset_info}'
+            dataset_info = f'{output.args['dataset']}/{output.train_dataset_info}'
         else:
-            dataset_info = f'{output.args["dataset"]}'
+            dataset_info = f'{output.args['dataset']}'
         line = f'|{output.name}|' \
-               f'{output.args["model_type"]}|' \
+               f'{output.args['model_type']}|' \
                f'{dataset_info}|' \
-               f'{output.args["quant_method"]}|' \
-               f'{output.args["quant_bits"]}|' \
+               f'{output.args['quant_method']}|' \
+               f'{output.args['quant_bits']}|' \
                f'{infer_speed}|' \
                f'{gsm8k_acc}|' \
                f'{arc_acc}|' \