Update for model examples and remove test code

intel · Jul 18, 2024 · 4b5f857 · 4b5f857
1 parent 7f2faf9
commit 4b5f857
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 11 deletions.
diff --git a/examples/llama.py b/examples/llama.py
@@ -5,11 +5,13 @@
 
 from transformers import AutoTokenizer, TextStreamer
 from intel_npu_acceleration_library import NPUModelForCausalLM, int4
+from intel_npu_acceleration_library.compiler import CompilerConfig
 
 model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
+compiler_conf = CompilerConfig(dtype=int4)
 model = NPUModelForCausalLM.from_pretrained(
-    model_id, use_cache=True, dtype=int4, attn_implementation="sdpa"
+    model_id, use_cache=True, config=compiler_conf, attn_implementation="sdpa"
 ).eval()
 tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=True)
 tokenizer.pad_token_id = tokenizer.eos_token_id

diff --git a/examples/llama3.py b/examples/llama3.py
@@ -5,10 +5,14 @@
 
 from transformers import AutoTokenizer, TextStreamer
 from intel_npu_acceleration_library import NPUModelForCausalLM, int4
+from intel_npu_acceleration_library.compiler import CompilerConfig
 
 model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 
-model = NPUModelForCausalLM.from_pretrained(model_id, dtype=int4, use_cache=True).eval()
+compiler_conf = CompilerConfig(dtype=int4)
+model = NPUModelForCausalLM.from_pretrained(
+    model_id, use_cache=True, config=compiler_conf
+).eval()
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 streamer = TextStreamer(tokenizer, skip_special_tokens=True, skip_prompt=True)
 

diff --git a/examples/phi-2.py b/examples/phi-2.py
@@ -7,12 +7,14 @@
 from langchain.chains import LLMChain
 from langchain.llms import HuggingFacePipeline
 from transformers import AutoTokenizer, pipeline, TextStreamer
+from intel_npu_acceleration_library.compiler import CompilerConfig
 import intel_npu_acceleration_library as npu_lib
 
 model_id = "microsoft/Phi-2"
 
+compiler_conf = CompilerConfig(dtype=npu_lib.int4)
 model = npu_lib.NPUModelForCausalLM.from_pretrained(
-    model_id, use_cache=True, dtype=npu_lib.int4
+    model_id, use_cache=True, config=compiler_conf
 ).eval()
 tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=True)
 streamer = TextStreamer(tokenizer, skip_special_tokens=True, skip_prompt=True)

diff --git a/examples/phi-3.py b/examples/phi-3.py
@@ -5,15 +5,17 @@
 
 import torch
 from transformers import AutoTokenizer, pipeline, TextStreamer
+from intel_npu_acceleration_library.compiler import CompilerConfig
 import intel_npu_acceleration_library as npu_lib
 import warnings
 
 torch.random.manual_seed(0)
 
+compiler_conf = CompilerConfig(dtype=npu_lib.int4)
 model = npu_lib.NPUModelForCausalLM.from_pretrained(
     "microsoft/Phi-3-mini-4k-instruct",
+    config=compiler_conf,
     torch_dtype="auto",
-    dtype=npu_lib.int4,
 )
 
 tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

diff --git a/examples/t5.py b/examples/t5.py
@@ -5,10 +5,14 @@
 
 from transformers import AutoTokenizer, TextStreamer
 from intel_npu_acceleration_library import NPUModelForSeq2SeqLM
+from intel_npu_acceleration_library.compiler import CompilerConfig
 
 model_id = "google/flan-t5-small"
 
-model = NPUModelForSeq2SeqLM.from_pretrained(model_id, use_cache=True).eval()
+compiler_conf = CompilerConfig()
+model = NPUModelForSeq2SeqLM.from_pretrained(
+    model_id, use_cache=True, config=compiler_conf
+).eval()
 tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=True)
 tokenizer.pad_token_id = tokenizer.eos_token_id
 streamer = TextStreamer(tokenizer, skip_special_tokens=True)

diff --git a/intel_npu_acceleration_library/compiler.py b/intel_npu_acceleration_library/compiler.py
@@ -112,7 +112,6 @@ def module_optimization(func: Callable) -> torch.nn.Module:
     Returns:
         torch.nn.Module: optimized module
     """
-    module_optimization.counter = 0  # type: ignore[attr-defined]
 
     def wrapper(model: torch.nn.Module, *args: Any, **kwargs: Any):
         """Recursively apply the optimization function.
@@ -126,12 +125,7 @@ def wrapper(model: torch.nn.Module, *args: Any, **kwargs: Any):
         if not isinstance(model, NPUModuleWrapper):
             for name, layer in model.named_children():
                 new_layer = func(name, layer, *args, **kwargs)
-                if (func.__name__ == "optimize_phi3_MLP") and (
-                    module_optimization.counter >= 5  # type: ignore[attr-defined]
-                ):
-                    new_layer = None
                 if new_layer:
-                    module_optimization.counter += 1  # type: ignore[attr-defined]
                     model.add_module(name, new_layer)
                     if not isinstance(new_layer, NPUModuleWrapper):
                         wrapper(new_layer, *args, **kwargs)