Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions examples/train/rft/rft.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int):
for device in range(device_count):
sample_cmd = (f'{conda_prefix} USE_OPENCOMPASS_EVALUATOR=True CUDA_VISIBLE_DEVICES={device} swift sample '
f'--model {model} --model_type {model_type} '
f'--dataset {" ".join(dataset)} '
f'--dataset {' '.join(dataset)} '
f'--data_range {device} {device_count} '
f'--max_length 2048 '
f'--system "You are a math model, you should **think step by step** carefully, '
Expand Down Expand Up @@ -61,7 +61,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int):
sample_cmd = (
f'{conda_prefix} USE_OPENCOMPASS_EVALUATOR=True CUDA_VISIBLE_DEVICES={device} swift sample '
f'--model {model} --model_type {model_type} ' # change to --resume_from_checkpoint to use the latest optimizer state # noqa
f'--dataset {" ".join(dataset)} '
f'--dataset {' '.join(dataset)} '
f'--data_range {device} {device_count} '
f'--max_length 2048 '
f'--system "You are a math model, you should **think step by step** carefully, '
Expand Down Expand Up @@ -91,7 +91,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int):
for proc, handler in enumerate(handlers):
handler.wait()
assert os.path.exists(os.path.join('sample_output', f'iter_{iter}_proc_{proc}_sampling.jsonl')), (
f'{os.path.join("sample_output", f"iter_{iter}_proc_{proc}_sampling.jsonl")} not exists, '
f'{os.path.join('sample_output', f"iter_{iter}_proc_{proc}_sampling.jsonl")} not exists, '
'please check the sample logs to get the detail error.')
datasets.append(os.path.join('sample_output', f'iter_{iter}_proc_{proc}_sampling.jsonl'))
print(f'Sampling done, files:{datasets}', flush=True)
Expand All @@ -110,7 +110,7 @@ def do_train(model: str, model_type: str, datasets: List[str], iter, cmd='sft'):
ga = 128 // get_device_count() // 2
train_cmd = (f'{conda_prefix} {gpu_prefix} swift {cmd} '
f'--model {model} --model_type {model_type} '
f'--dataset {" ".join(datasets)} '
f'--dataset {' '.join(datasets)} '
f'--max_length 2048 '
f'--num_train_epochs 1 '
f'--load_args false '
Expand Down
10 changes: 5 additions & 5 deletions scripts/benchmark/exp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def run(self, exp: Experiment):
exp.runtime = runtime
envs = deepcopy(runtime.get('env', {}))
envs.update(os.environ)
logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
logger.info(f'Running cmd: {runtime['running_cmd']}, env: {runtime.get('env', {})}')
os.makedirs('exp', exist_ok=True)
log_file = os.path.join('exp', f'{exp.name}.eval.log')
exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
Expand All @@ -140,7 +140,7 @@ def run(self, exp: Experiment):
exp.runtime = runtime
envs = deepcopy(runtime.get('env', {}))
envs.update(os.environ)
logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
logger.info(f'Running cmd: {runtime['running_cmd']}, env: {runtime.get('env', {})}')
os.makedirs('exp', exist_ok=True)
log_file = os.path.join('exp', f'{exp.name}.{exp.cmd}.log')
exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
Expand All @@ -162,10 +162,10 @@ def _build_eval_cmd(self, exp: Experiment):
if best_model_checkpoint is not None:
if not os.path.exists(os.path.join(best_model_checkpoint, 'args.json')):
cmd = f'swift eval --ckpt_dir {best_model_checkpoint} ' \
+ f'--infer_backend pt --train_type full --eval_dataset {" ".join(eval_dataset)}'
+ f'--infer_backend pt --train_type full --eval_dataset {' '.join(eval_dataset)}'
else:
cmd = f'swift eval --model {exp.args.get("model")} --infer_backend pt ' \
f'--eval_dataset {" ".join(eval_dataset)}'
cmd = f'swift eval --model {exp.args.get('model')} --infer_backend pt ' \
f'--eval_dataset {' '.join(eval_dataset)}'

return {
'running_cmd': cmd,
Expand Down
44 changes: 22 additions & 22 deletions scripts/benchmark/generate_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,23 +69,23 @@ def tuner_hyper_params(self):
return ''
if args['sft_type'] in ('lora', 'adalora', 'longlora'):
if 'lora_rank' in args:
hyper_params += f'rank={args["lora_rank"]}/' \
f'target={args["lora_target_modules"]}/' \
f'alpha={args["lora_alpha"]}/' \
f'lr_ratio={args.get("lora_lr_ratio", None)}/' \
f'use_rslora={args.get("use_rslora", False)}/' \
f'use_dora={args.get("use_dora", False)}'
hyper_params += f'rank={args['lora_rank']}/' \
f'target={args['lora_target_modules']}/' \
f'alpha={args['lora_alpha']}/' \
f'lr_ratio={args.get('lora_lr_ratio', None)}/' \
f'use_rslora={args.get('use_rslora', False)}/' \
f'use_dora={args.get('use_dora', False)}'
else:
hyper_params = ''
if args['sft_type'] == 'full':
if 'use_galore' in args and args['use_galore'] == 'true':
hyper_params += f'galore_rank={args["galore_rank"]}/' \
f'galore_per_parameter={args["galore_optim_per_parameter"]}/' \
f'galore_with_embedding={args["galore_with_embedding"]}/'
hyper_params += f'galore_rank={args['galore_rank']}/' \
f'galore_per_parameter={args['galore_optim_per_parameter']}/' \
f'galore_with_embedding={args['galore_with_embedding']}/'
if args['sft_type'] == 'llamapro':
hyper_params += f'num_blocks={args["llamapro_num_new_blocks"]}/'
hyper_params += f'num_blocks={args['llamapro_num_new_blocks']}/'
if 'neftune_noise_alpha' in args and args['neftune_noise_alpha']:
hyper_params += f'neftune_noise_alpha={args["neftune_noise_alpha"]}/'
hyper_params += f'neftune_noise_alpha={args['neftune_noise_alpha']}/'

if hyper_params.endswith('/'):
hyper_params = hyper_params[:-1]
Expand All @@ -95,8 +95,8 @@ def tuner_hyper_params(self):
def hyper_parameters(self):
if 'learning_rate' not in self.args:
return ''
return f'lr={self.args["learning_rate"]}/' \
f'epoch={self.args["num_train_epochs"]}'
return f'lr={self.args['learning_rate']}/' \
f'epoch={self.args['num_train_epochs']}'

@property
def train_speed(self):
Expand Down Expand Up @@ -190,10 +190,10 @@ def generate_sft_report(outputs: List[ModelOutput]):
ceval_acc = '' if not ceval_acc else f'**{ceval_acc:.3f}**'

line = f'|{output.name}|' \
f'{output.args["model_type"]}|' \
f'{output.args.get("dataset")}|' \
f'{output.args.get("train_dataset_mix_ratio", 0.)}|' \
f'{output.args.get("sft_type")}|' \
f'{output.args['model_type']}|' \
f'{output.args.get('dataset')}|' \
f'{output.args.get('train_dataset_mix_ratio', 0.)}|' \
f'{output.args.get('sft_type')}|' \
f'{output.tuner_hyper_params}|' \
f'{output.num_trainable_parameters}({output.trainable_parameters_percentage})|' \
f'{use_flash_attn}|' \
Expand Down Expand Up @@ -267,14 +267,14 @@ def generate_export_report(outputs: List[ModelOutput]):
ceval_acc = '' if not ceval_acc else f'**{ceval_acc:.3f}**'

if output.train_dataset_info:
dataset_info = f'{output.args["dataset"]}/{output.train_dataset_info}'
dataset_info = f'{output.args['dataset']}/{output.train_dataset_info}'
else:
dataset_info = f'{output.args["dataset"]}'
dataset_info = f'{output.args['dataset']}'
line = f'|{output.name}|' \
f'{output.args["model_type"]}|' \
f'{output.args['model_type']}|' \
f'{dataset_info}|' \
f'{output.args["quant_method"]}|' \
f'{output.args["quant_bits"]}|' \
f'{output.args['quant_method']}|' \
f'{output.args['quant_bits']}|' \
f'{infer_speed}|' \
f'{gsm8k_acc}|' \
f'{arc_acc}|' \
Expand Down
Loading
Loading