Skip to content

Commit

Permalink
Update for model examples and remove test code
Browse files Browse the repository at this point in the history
  • Loading branch information
SarahByrneIntel committed Jul 18, 2024
1 parent 7f2faf9 commit 4b5f857
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 11 deletions.
4 changes: 3 additions & 1 deletion examples/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@

from transformers import AutoTokenizer, TextStreamer
from intel_npu_acceleration_library import NPUModelForCausalLM, int4
from intel_npu_acceleration_library.compiler import CompilerConfig

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

compiler_conf = CompilerConfig(dtype=int4)
model = NPUModelForCausalLM.from_pretrained(
model_id, use_cache=True, dtype=int4, attn_implementation="sdpa"
model_id, use_cache=True, config=compiler_conf, attn_implementation="sdpa"
).eval()
tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
Expand Down
6 changes: 5 additions & 1 deletion examples/llama3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@

from transformers import AutoTokenizer, TextStreamer
from intel_npu_acceleration_library import NPUModelForCausalLM, int4
from intel_npu_acceleration_library.compiler import CompilerConfig

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

model = NPUModelForCausalLM.from_pretrained(model_id, dtype=int4, use_cache=True).eval()
compiler_conf = CompilerConfig(dtype=int4)
model = NPUModelForCausalLM.from_pretrained(
model_id, use_cache=True, config=compiler_conf
).eval()
tokenizer = AutoTokenizer.from_pretrained(model_id)
streamer = TextStreamer(tokenizer, skip_special_tokens=True, skip_prompt=True)

Expand Down
4 changes: 3 additions & 1 deletion examples/phi-2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, TextStreamer
from intel_npu_acceleration_library.compiler import CompilerConfig
import intel_npu_acceleration_library as npu_lib

model_id = "microsoft/Phi-2"

compiler_conf = CompilerConfig(dtype=npu_lib.int4)
model = npu_lib.NPUModelForCausalLM.from_pretrained(
model_id, use_cache=True, dtype=npu_lib.int4
model_id, use_cache=True, config=compiler_conf
).eval()
tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=True)
streamer = TextStreamer(tokenizer, skip_special_tokens=True, skip_prompt=True)
Expand Down
4 changes: 3 additions & 1 deletion examples/phi-3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@

import torch
from transformers import AutoTokenizer, pipeline, TextStreamer
from intel_npu_acceleration_library.compiler import CompilerConfig
import intel_npu_acceleration_library as npu_lib
import warnings

torch.random.manual_seed(0)

compiler_conf = CompilerConfig(dtype=npu_lib.int4)
model = npu_lib.NPUModelForCausalLM.from_pretrained(
"microsoft/Phi-3-mini-4k-instruct",
config=compiler_conf,
torch_dtype="auto",
dtype=npu_lib.int4,
)

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
Expand Down
6 changes: 5 additions & 1 deletion examples/t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@

from transformers import AutoTokenizer, TextStreamer
from intel_npu_acceleration_library import NPUModelForSeq2SeqLM
from intel_npu_acceleration_library.compiler import CompilerConfig

model_id = "google/flan-t5-small"

model = NPUModelForSeq2SeqLM.from_pretrained(model_id, use_cache=True).eval()
compiler_conf = CompilerConfig()
model = NPUModelForSeq2SeqLM.from_pretrained(
model_id, use_cache=True, config=compiler_conf
).eval()
tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
streamer = TextStreamer(tokenizer, skip_special_tokens=True)
Expand Down
6 changes: 0 additions & 6 deletions intel_npu_acceleration_library/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def module_optimization(func: Callable) -> torch.nn.Module:
Returns:
torch.nn.Module: optimized module
"""
module_optimization.counter = 0 # type: ignore[attr-defined]

def wrapper(model: torch.nn.Module, *args: Any, **kwargs: Any):
"""Recursively apply the optimization function.
Expand All @@ -126,12 +125,7 @@ def wrapper(model: torch.nn.Module, *args: Any, **kwargs: Any):
if not isinstance(model, NPUModuleWrapper):
for name, layer in model.named_children():
new_layer = func(name, layer, *args, **kwargs)
if (func.__name__ == "optimize_phi3_MLP") and (
module_optimization.counter >= 5 # type: ignore[attr-defined]
):
new_layer = None
if new_layer:
module_optimization.counter += 1 # type: ignore[attr-defined]
model.add_module(name, new_layer)
if not isinstance(new_layer, NPUModuleWrapper):
wrapper(new_layer, *args, **kwargs)
Expand Down

0 comments on commit 4b5f857

Please sign in to comment.