Skip to content

Commit

Permalink
Add phi4 (#2895)
Browse files Browse the repository at this point in the history
  • Loading branch information
tastelikefeet authored Jan 9, 2025
1 parent c133e41 commit c93be11
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/source/Instruction/支持的模型和数据集.md
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@
|[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
|[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
|[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
|[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
|[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
|[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ The table below introduces the models integrated with ms-swift:
|[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
|[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
|[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
|[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
|[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
|[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|
Expand Down
1 change: 1 addition & 0 deletions swift/llm/model/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class LLMModelType:
phi3_small = 'phi3_small'
phi3 = 'phi3'
phi3_moe = 'phi3_moe'
phi4 = 'phi4'

gemma = 'gemma'
gemma2 = 'gemma2'
Expand Down
15 changes: 15 additions & 0 deletions swift/llm/model/model/microsoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,21 @@ def get_model_tokenizer_phi(model_dir: str,
model_arch=ModelArch.phi3,
))

register_model(
ModelMeta(
LLMModelType.phi4,
[
ModelGroup([
Model('LLM-Research/phi-4', 'microsoft/phi-4'),
]),
],
TemplateType.phi4,
get_model_tokenizer_with_flash_attn,
architectures=['Phi3ForCausalLM'],
requires=['transformers>=4.36'],
model_arch=ModelArch.phi3,
))

register_model(
ModelMeta(
LLMModelType.phi3_moe,
Expand Down
1 change: 1 addition & 0 deletions swift/llm/template/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class LLMTemplateType:
wizardlm2_moe = 'wizardlm2_moe'
gemma = 'gemma'
phi3 = 'phi3'
phi4 = 'phi4'

yuan = 'yuan'
xverse = 'xverse'
Expand Down
15 changes: 15 additions & 0 deletions swift/llm/template/template/microsoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,21 @@ class Phi3TemplateMeta(TemplateMeta):
register_template(Phi3TemplateMeta(LLMTemplateType.phi3))


@dataclass
class Phi4TemplateMeta(TemplateMeta):
prefix: Prompt = field(default_factory=list)
prompt: Prompt = field(
default_factory=lambda: ['<|im_start|>user<|im_sep|>{{QUERY}}<|im_end|><|im_start|>assistant<|im_sep|>'])
chat_sep: Optional[Prompt] = field(default_factory=lambda: ['<|im_end|>'])
suffix: Prompt = field(default_factory=lambda: ['<|im_end|>'])
system_prefix: Optional[Prompt] = field(
default_factory=lambda: ['<|im_start|>system<|im_sep|>{{SYSTEM}}<|im_end|>'])
auto_add_bos: bool = True


register_template(Phi4TemplateMeta(LLMTemplateType.phi4))


class Phi3VisionTemplate(Template):
image_placeholder = ['<|image|><s>\n'] # <|image|>\n

Expand Down
10 changes: 9 additions & 1 deletion tests/test_align/test_template/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def test_qwen2_5():
_infer_model(pt_engine)


def test_phi4():
pt_engine = PtEngine('LLM-Research/phi-4')
_infer_model(pt_engine)
pt_engine.default_template.template_backend = 'jinja'
_infer_model(pt_engine)


def test_qwen1half():
pt_engine = PtEngine('Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4')
_infer_model(pt_engine)
Expand Down Expand Up @@ -267,4 +274,5 @@ def test_skywork_reward():
# test_internlm2_reward()
# test_qwen2_reward()
# test_qwen2_5_math()
test_skywork_reward()
# test_skywork_reward()
test_phi4()

0 comments on commit c93be11

Please sign in to comment.