Skip to content
This repository was archived by the owner on Apr 24, 2025. It is now read-only.

Commit 7f2faf9

Browse files
author
SarahByrneIntel
committed
Fix for compiler, updates for tests and examples, doc update
1 parent e652eaa commit 7f2faf9

File tree

7 files changed

+39
-13
lines changed

7 files changed

+39
-13
lines changed

docs/source/usage.md

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,33 @@ optimized_model = torch.compile(model, backend="npu")
3838

3939
In windows torch.compile is not supported yet. So you might want to use the explicit function `intel_npu_acceleration_library.compile`. This is true also if you use a `pytorch` version < 2.0.0
4040

41+
To do this, you just need to call the `compile` function with your model and the compiler configuration `CompilerConfig` to compile and optimize the model for the NPU.
4142
```python
4243
import intel_npu_acceleration_library
43-
optimized_model = intel_npu_acceleration_library.compile(model, dtype=torch.int8)
44+
from intel_npu_acceleration_library.compiler import CompilerConfig
45+
compiler_conf = CompilerConfig(dtype=torch.int8)
46+
optimized_model = intel_npu_acceleration_library.compile(model, compiler_conf)
4447

4548
# Use the model as usual
4649

4750
```
4851

52+
To compile and optimize a single layer of a model to be pushed to the NPU as one block, you can set `use_to=True` in the the compiler configuration `CompilerConfig`.
53+
```python
54+
import intel_npu_acceleration_library
55+
from intel_npu_acceleration_library.compiler import CompilerConfig
56+
compiler_conf = CompilerConfig(use_to=True, dtype=torch.int8)
57+
optimized_block = intel_npu_acceleration_library.compile(single_block, compiler_conf)
58+
59+
```
60+
4961
## Training (**Experimental!**)
5062

5163
It is possible to use Intel® NPU Acceleration Library to train a model. As before you just need to call the `compile` function, this time with `training=True`. This allows to use the same training script you use in other device with a very minimal modifications.
5264

5365
```python
5466
import intel_npu_acceleration_library
55-
compiled_model = intel_npu_acceleration_library.compile(model, dtype=torch.float32, training=True)
67+
from intel_npu_acceleration_library.compiler import CompilerConfig
68+
compiler_conf = CompilerConfig(dtype=torch.float32, training=True)
69+
compiled_model = intel_npu_acceleration_library.compile(model, compiler_conf)
5670
```

examples/compile_model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
from intel_npu_acceleration_library import compile
8+
from intel_npu_acceleration_library.compiler import CompilerConfig
89
from sklearn.metrics import r2_score
910
import intel_npu_acceleration_library
1011
import pytest
@@ -41,12 +42,13 @@ def forward(self, x):
4142
print(
4243
"Windows do not support torch.compile, fallback to intel_npu_acceleration_library.compile"
4344
)
44-
compiled_model = intel_npu_acceleration_library.compile(model)
45+
compiler_conf = CompilerConfig()
46+
compiled_model = intel_npu_acceleration_library.compile(model, compiler_conf)
4547
else:
4648
compiled_model = torch.compile(model, backend="npu")
4749

4850
# Get the NPU output
4951
with torch.no_grad():
5052
y = compiled_model(x)
5153

52-
print(f"Reference vs actual R2 score: {r2_score(y_ref, y):.2f}")
54+
print(f"Reference vs actual R2 score: {r2_score(y_ref.numpy(), y.numpy()):.2f}")

examples/llava.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
TextStreamer,
1313
)
1414
from transformers.feature_extraction_utils import BatchFeature
15+
from intel_npu_acceleration_library.compiler import CompilerConfig
1516
import intel_npu_acceleration_library
1617
import torch
1718

@@ -21,7 +22,8 @@
2122
# Load model
2223
model = LlavaForConditionalGeneration.from_pretrained(checkpoint)
2324

24-
model = intel_npu_acceleration_library.compile(model)
25+
compiler_conf = CompilerConfig()
26+
model = intel_npu_acceleration_library.compile(model, compiler_conf)
2527

2628
image_processor = CLIPImageProcessor.from_pretrained(checkpoint)
2729
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

examples/tiny_llama_chat.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#
55

66
from transformers import pipeline, TextStreamer, set_seed
7+
from intel_npu_acceleration_library.compiler import CompilerConfig
78
import intel_npu_acceleration_library
89
import torch
910
import os
@@ -15,7 +16,8 @@
1516
"text-generation", model=model_id, torch_dtype=torch.bfloat16, device_map="auto"
1617
)
1718
print("Compiling the model for NPU...")
18-
pipe.model = intel_npu_acceleration_library.compile(pipe.model, dtype=torch.int8)
19+
compiler_conf = CompilerConfig(dtype=torch.int8)
20+
pipe.model = intel_npu_acceleration_library.compile(pipe.model, compiler_conf)
1921

2022
streamer = TextStreamer(pipe.tokenizer, skip_special_tokens=True, skip_prompt=True)
2123

examples/train_mnist.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import torch
88
from torch import nn
99
import intel_npu_acceleration_library
10+
from intel_npu_acceleration_library.compiler import CompilerConfig
1011
from torch.utils.data import DataLoader
1112
from torchvision import datasets
1213
from torchvision.transforms import ToTensor
@@ -90,8 +91,8 @@ def test_loop(dataloader, model, loss_fn):
9091

9192

9293
model = NeuralNetwork()
93-
94-
model = intel_npu_acceleration_library.compile(model, torch.float32, training=True)
94+
compiler_conf = CompilerConfig(dtype=torch.float32, training=True)
95+
model = intel_npu_acceleration_library.compile(model, compiler_conf)
9596

9697
learning_rate = 1e-3
9798
batch_size = 64

intel_npu_acceleration_library/compiler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,14 +288,12 @@ def forward(self, input):
288288
@register_backend
289289
def npu(
290290
gm: Union[torch.nn.Module, torch.fx.GraphModule],
291-
config: CompilerConfig,
292291
example_inputs: List[torch.Tensor],
293292
) -> Union[torch.nn.Module, torch.fx.GraphModule]:
294293
"""Implement the custom torch 2.0 compile backend for the NPU.
295294
296295
Args:
297296
gm (Union[torch.nn.Module, torch.fx.GraphModule]): The torch fx Module
298-
config (CompilerConfig): The compiler configuration
299297
example_inputs (List[torch.Tensor]): A list of example inputs
300298
301299
Returns:
@@ -305,4 +303,5 @@ def npu(
305303
gm = horizontal_fusion_linear(gm)
306304

307305
# For now compile in fp16
306+
config = CompilerConfig()
308307
return compile(gm, config)

script/export.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
77
from intel_npu_acceleration_library.compiler import compile
8+
from intel_npu_acceleration_library.compiler import CompilerConfig
9+
from intel_npu_acceleration_library.dtypes import int8, int4
810
import argparse
911
import torch
1012
import os
@@ -41,15 +43,19 @@ def export(model_id, dtype, output):
4143

4244
if dtype == "fp16":
4345
print(f"Compiling model {model_id}")
44-
torch_dtype = torch.float16
46+
dtype = torch.float16
4547
elif dtype == "int8":
4648
print(f"Quantizing & Compiling model {model_id}")
47-
torch_dtype = torch.int8
49+
dtype = int8
50+
elif dtype == "int4":
51+
print(f"Quantizing & Compiling model {model_id}")
52+
dtype = int4
4853
else:
4954
raise RuntimeError(f"Invalid dtype {dtype}")
5055

5156
with torch.no_grad():
52-
compile(model, dtype=torch_dtype)
57+
compiler_conf = CompilerConfig(dtype=dtype)
58+
compile(model, compiler_conf)
5359

5460
filename = os.path.join(PATH, "model.pth")
5561
os.makedirs(PATH, exist_ok=True)

0 commit comments

Comments
 (0)