Add phi4 (#2895)

modelscope · Jan 9, 2025 · c93be11 · c93be11
1 parent c133e41
commit c93be11
Show file tree

Hide file tree

Showing 7 changed files with 43 additions and 1 deletion.
diff --git a/docs/source/Instruction/支持的模型和数据集.md b/docs/source/Instruction/支持的模型和数据集.md
@@ -413,6 +413,7 @@
 |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
 |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
 |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
+|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
 |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
 |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
 |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|

diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md
@@ -413,6 +413,7 @@ The table below introduces the models integrated with ms-swift:
 |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
 |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
 |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
+|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
 |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
 |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
 |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|

diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py
@@ -75,6 +75,7 @@ class LLMModelType:
     phi3_small = 'phi3_small'
     phi3 = 'phi3'
     phi3_moe = 'phi3_moe'
+    phi4 = 'phi4'
 
     gemma = 'gemma'
     gemma2 = 'gemma2'

diff --git a/swift/llm/model/model/microsoft.py b/swift/llm/model/model/microsoft.py
@@ -173,6 +173,21 @@ def get_model_tokenizer_phi(model_dir: str,
         model_arch=ModelArch.phi3,
     ))
 
+register_model(
+    ModelMeta(
+        LLMModelType.phi4,
+        [
+            ModelGroup([
+                Model('LLM-Research/phi-4', 'microsoft/phi-4'),
+            ]),
+        ],
+        TemplateType.phi4,
+        get_model_tokenizer_with_flash_attn,
+        architectures=['Phi3ForCausalLM'],
+        requires=['transformers>=4.36'],
+        model_arch=ModelArch.phi3,
+    ))
+
 register_model(
     ModelMeta(
         LLMModelType.phi3_moe,

diff --git a/swift/llm/template/constant.py b/swift/llm/template/constant.py
@@ -59,6 +59,7 @@ class LLMTemplateType:
     wizardlm2_moe = 'wizardlm2_moe'
     gemma = 'gemma'
     phi3 = 'phi3'
+    phi4 = 'phi4'
 
     yuan = 'yuan'
     xverse = 'xverse'

diff --git a/swift/llm/template/template/microsoft.py b/swift/llm/template/template/microsoft.py
@@ -103,6 +103,21 @@ class Phi3TemplateMeta(TemplateMeta):
 register_template(Phi3TemplateMeta(LLMTemplateType.phi3))
 
 
+@dataclass
+class Phi4TemplateMeta(TemplateMeta):
+    prefix: Prompt = field(default_factory=list)
+    prompt: Prompt = field(
+        default_factory=lambda: ['<|im_start|>user<|im_sep|>{{QUERY}}<|im_end|><|im_start|>assistant<|im_sep|>'])
+    chat_sep: Optional[Prompt] = field(default_factory=lambda: ['<|im_end|>'])
+    suffix: Prompt = field(default_factory=lambda: ['<|im_end|>'])
+    system_prefix: Optional[Prompt] = field(
+        default_factory=lambda: ['<|im_start|>system<|im_sep|>{{SYSTEM}}<|im_end|>'])
+    auto_add_bos: bool = True
+
+
+register_template(Phi4TemplateMeta(LLMTemplateType.phi4))
+
+
 class Phi3VisionTemplate(Template):
     image_placeholder = ['<|image|><s>\n']  # <|image|>\n
 

diff --git a/tests/test_align/test_template/test_llm.py b/tests/test_align/test_template/test_llm.py
@@ -35,6 +35,13 @@ def test_qwen2_5():
     _infer_model(pt_engine)
 
 
+def test_phi4():
+    pt_engine = PtEngine('LLM-Research/phi-4')
+    _infer_model(pt_engine)
+    pt_engine.default_template.template_backend = 'jinja'
+    _infer_model(pt_engine)
+
+
 def test_qwen1half():
     pt_engine = PtEngine('Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4')
     _infer_model(pt_engine)
@@ -267,4 +274,5 @@ def test_skywork_reward():
     # test_internlm2_reward()
     # test_qwen2_reward()
     # test_qwen2_5_math()
-    test_skywork_reward()
+    # test_skywork_reward()
+    test_phi4()