Skip to content

Commit

Permalink
Fix for compiler, updates for tests and examples, doc update
Browse files Browse the repository at this point in the history
  • Loading branch information
SarahByrneIntel committed Jul 18, 2024
1 parent e652eaa commit 7f2faf9
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 13 deletions.
18 changes: 16 additions & 2 deletions docs/source/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,33 @@ optimized_model = torch.compile(model, backend="npu")

In windows torch.compile is not supported yet. So you might want to use the explicit function `intel_npu_acceleration_library.compile`. This is true also if you use a `pytorch` version < 2.0.0

To do this, you just need to call the `compile` function with your model and the compiler configuration `CompilerConfig` to compile and optimize the model for the NPU.
```python
import intel_npu_acceleration_library
optimized_model = intel_npu_acceleration_library.compile(model, dtype=torch.int8)
from intel_npu_acceleration_library.compiler import CompilerConfig
compiler_conf = CompilerConfig(dtype=torch.int8)
optimized_model = intel_npu_acceleration_library.compile(model, compiler_conf)

# Use the model as usual

```

To compile and optimize a single layer of a model to be pushed to the NPU as one block, you can set `use_to=True` in the the compiler configuration `CompilerConfig`.
```python
import intel_npu_acceleration_library
from intel_npu_acceleration_library.compiler import CompilerConfig
compiler_conf = CompilerConfig(use_to=True, dtype=torch.int8)
optimized_block = intel_npu_acceleration_library.compile(single_block, compiler_conf)

```

## Training (**Experimental!**)

It is possible to use Intel® NPU Acceleration Library to train a model. As before you just need to call the `compile` function, this time with `training=True`. This allows to use the same training script you use in other device with a very minimal modifications.

```python
import intel_npu_acceleration_library
compiled_model = intel_npu_acceleration_library.compile(model, dtype=torch.float32, training=True)
from intel_npu_acceleration_library.compiler import CompilerConfig
compiler_conf = CompilerConfig(dtype=torch.float32, training=True)
compiled_model = intel_npu_acceleration_library.compile(model, compiler_conf)
```
6 changes: 4 additions & 2 deletions examples/compile_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


from intel_npu_acceleration_library import compile
from intel_npu_acceleration_library.compiler import CompilerConfig
from sklearn.metrics import r2_score
import intel_npu_acceleration_library
import pytest
Expand Down Expand Up @@ -41,12 +42,13 @@ def forward(self, x):
print(
"Windows do not support torch.compile, fallback to intel_npu_acceleration_library.compile"
)
compiled_model = intel_npu_acceleration_library.compile(model)
compiler_conf = CompilerConfig()
compiled_model = intel_npu_acceleration_library.compile(model, compiler_conf)
else:
compiled_model = torch.compile(model, backend="npu")

# Get the NPU output
with torch.no_grad():
y = compiled_model(x)

print(f"Reference vs actual R2 score: {r2_score(y_ref, y):.2f}")
print(f"Reference vs actual R2 score: {r2_score(y_ref.numpy(), y.numpy()):.2f}")
4 changes: 3 additions & 1 deletion examples/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
TextStreamer,
)
from transformers.feature_extraction_utils import BatchFeature
from intel_npu_acceleration_library.compiler import CompilerConfig
import intel_npu_acceleration_library
import torch

Expand All @@ -21,7 +22,8 @@
# Load model
model = LlavaForConditionalGeneration.from_pretrained(checkpoint)

model = intel_npu_acceleration_library.compile(model)
compiler_conf = CompilerConfig()
model = intel_npu_acceleration_library.compile(model, compiler_conf)

image_processor = CLIPImageProcessor.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
Expand Down
4 changes: 3 additions & 1 deletion examples/tiny_llama_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#

from transformers import pipeline, TextStreamer, set_seed
from intel_npu_acceleration_library.compiler import CompilerConfig
import intel_npu_acceleration_library
import torch
import os
Expand All @@ -15,7 +16,8 @@
"text-generation", model=model_id, torch_dtype=torch.bfloat16, device_map="auto"
)
print("Compiling the model for NPU...")
pipe.model = intel_npu_acceleration_library.compile(pipe.model, dtype=torch.int8)
compiler_conf = CompilerConfig(dtype=torch.int8)
pipe.model = intel_npu_acceleration_library.compile(pipe.model, compiler_conf)

streamer = TextStreamer(pipe.tokenizer, skip_special_tokens=True, skip_prompt=True)

Expand Down
5 changes: 3 additions & 2 deletions examples/train_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import torch
from torch import nn
import intel_npu_acceleration_library
from intel_npu_acceleration_library.compiler import CompilerConfig
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
Expand Down Expand Up @@ -90,8 +91,8 @@ def test_loop(dataloader, model, loss_fn):


model = NeuralNetwork()

model = intel_npu_acceleration_library.compile(model, torch.float32, training=True)
compiler_conf = CompilerConfig(dtype=torch.float32, training=True)
model = intel_npu_acceleration_library.compile(model, compiler_conf)

learning_rate = 1e-3
batch_size = 64
Expand Down
3 changes: 1 addition & 2 deletions intel_npu_acceleration_library/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,14 +288,12 @@ def forward(self, input):
@register_backend
def npu(
gm: Union[torch.nn.Module, torch.fx.GraphModule],
config: CompilerConfig,
example_inputs: List[torch.Tensor],
) -> Union[torch.nn.Module, torch.fx.GraphModule]:
"""Implement the custom torch 2.0 compile backend for the NPU.
Args:
gm (Union[torch.nn.Module, torch.fx.GraphModule]): The torch fx Module
config (CompilerConfig): The compiler configuration
example_inputs (List[torch.Tensor]): A list of example inputs
Returns:
Expand All @@ -305,4 +303,5 @@ def npu(
gm = horizontal_fusion_linear(gm)

# For now compile in fp16
config = CompilerConfig()
return compile(gm, config)
12 changes: 9 additions & 3 deletions script/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from intel_npu_acceleration_library.compiler import compile
from intel_npu_acceleration_library.compiler import CompilerConfig
from intel_npu_acceleration_library.dtypes import int8, int4
import argparse
import torch
import os
Expand Down Expand Up @@ -41,15 +43,19 @@ def export(model_id, dtype, output):

if dtype == "fp16":
print(f"Compiling model {model_id}")
torch_dtype = torch.float16
dtype = torch.float16
elif dtype == "int8":
print(f"Quantizing & Compiling model {model_id}")
torch_dtype = torch.int8
dtype = int8
elif dtype == "int4":
print(f"Quantizing & Compiling model {model_id}")
dtype = int4
else:
raise RuntimeError(f"Invalid dtype {dtype}")

with torch.no_grad():
compile(model, dtype=torch_dtype)
compiler_conf = CompilerConfig(dtype=dtype)
compile(model, compiler_conf)

filename = os.path.join(PATH, "model.pth")
os.makedirs(PATH, exist_ok=True)
Expand Down

0 comments on commit 7f2faf9

Please sign in to comment.