申请计算资源
实例
source /opt/app/anaconda3/bin/activate
conda activate llama_etuning
cp -a /opt/app/LLaMA-Efficient-Tuning ./
cd LLaMA-Efficient-Tuning/
公共集群
salloc p gpu-a800 --gres=gpu:4
ssh gpu1
source /opt/app/anaconda3/bin/activate
conda activate llama_etuning
cp -a /opt/app/LLaMA-Efficient-Tuning ./
cd ~/LLaMAEfficientTuning/
配置个人accelerate(主要指定卡数)
指令监督微调
accelerate launch src/train_bash.py \
--stage sft \
--model_name_or_path /opt/app/LLM/Baichuan-13B-Chat \
--do_train \
--dataset alpaca_gpt4_zh \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--output_dir sft_checkpoint \
--overwrite_cache \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 4 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--save_steps 1000 \
--learning_rate 5e-5 \
--num_train_epochs 3.0 \
--plot_loss \
--fp16 \
--template default \
--overwrite_output_dir
奖励模型训练
accelerate launch src/train_bash.py \
--stage rm \
--model_name_or_path /opt/app/LLM/Baichuan13BChat \
--do_train \
--dataset comparison_gpt4_zh \
--template default \
--finetuning_type lora \
--lora_target W_pack \
--resume_lora_training False \
--checkpoint_dir sft_checkpoint \
--output_dir rm_checkpoint \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 4 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--save_steps 1000 \
--learning_rate 1e6 \
--num_train_epochs 1.0 \
--plot_loss \
--fp16 \
--overwrite_output_dir
命令行测试
测试基础模型
python src/cli_demo.py \
--model_name_or_path baichuan-inc/Baichuan-13B-Chat \
--template default \
--finetuning_type lora
测试微调模型
python src/cli_demo.py \
--model_name_or_path /opt/app/LLM/Baichuan-13B-Chat \
--template default \
--finetuning_type lora \
--checkpoint_dir sft_checkpoint
测试奖励模型
python src/cli_demo.py \
--model_name_or_path /opt/app/LLM/Baichuan-13B-Chat \
--template default \
--finetuning_type lora \
--checkpoint_dir rm_checkpoint \
参考
https://github.com/hiyouga/LLaMAEfficientTuning/blob/main/README_zh.md
https://juejin.cn/post/7232091653065015355