diff --git a/docs/source/examples.md b/docs/source/examples.md
index f52b4d9e1..9a0e129f5 100644
--- a/docs/source/examples.md
+++ b/docs/source/examples.md
@@ -16,7 +16,7 @@
|Vision|stable diffusion
stable diffusion XL|[Link](https://github.com/microsoft/Olive/tree/main/examples/stable_diffusion)|`GPU`: with ONNX Runtime optimization for DirectML EP
`GPU`: with ONNX Runtime optimization for CUDA EP
`Intel CPU`: with OpenVINO toolkit
||squeezenet|[Link](https://github.com/microsoft/Olive/tree/main/examples/directml/squeezenet)|`GPU`: with ONNX Runtime optimizations with DirectML EP
||mobilenet|[Link](https://github.com/microsoft/Olive/tree/main/examples/mobilenet)|`Qualcomm NPU`: with ONNX Runtime static QDQ quantization for ONNX Runtime QNN EP
-||resnet|[Link](https://github.com/microsoft/Olive/tree/main/examples/resnet)|`CPU`: with ONNX Runtime static/dynamic Quantization for ONNX INT8 model
`CPU`: with PyTorch QAT Default Training Loop and ONNX Runtime optimizations for ONNX INT8 model
`CPU`: with PyTorch QAT Lightning Module and ONNX Runtime optimizations for ONNX INT8 model
`AMD DPU`: with AMD Vitis-AI Quantization
`Intel GPU`: with ONNX Runtime optimizations with multiple EPs
+||resnet|[Link](https://github.com/microsoft/Olive/tree/main/examples/resnet)|`CPU`: with ONNX Runtime static/dynamic Quantization for ONNX INT8 model
`CPU`: with PyTorch QAT Default Training Loop and ONNX Runtime optimizations for ONNX INT8 model
`CPU`: with PyTorch QAT Lightning Module and ONNX Runtime optimizations for ONNX INT8 model
`AMD DPU`: with AMD Vitis-AI Quantization
`Intel GPU`: with ONNX Runtime optimizations with multiple EPs
`Qualcomm NPU`: with ONNX Runtime static QDQ quantization for ONNX Runtime QNN EP
||VGG|[Link](https://github.com/microsoft/Olive/tree/main/examples/vgg)|`Qualcomm NPU`: with SNPE toolkit
||inception|[Link](https://github.com/microsoft/Olive/tree/main/examples/inception)|`Qualcomm NPU`: with SNPE toolkit
||super resolution|[Link](https://github.com/microsoft/Olive/tree/main/examples/super_resolution)|`CPU`: with ONNX Runtime pre/post processing integration for a single ONNX model
diff --git a/examples/resnet/README.md b/examples/resnet/README.md
index 43022d1c3..9d7f8e60f 100644
--- a/examples/resnet/README.md
+++ b/examples/resnet/README.md
@@ -5,7 +5,7 @@ This folder contains examples of ResNet optimization using different workflows.
- CPU: [with PyTorch QAT PyTorch Lightning Module and ORT optimizations](#resnet-optimization-with-qat-pytorch-lightning-module-on-cpu)
- AMD DPU: [with AMD Vitis-AI Quantization](#resnet-optimization-with-vitis-ai-ptq-on-amd-dpu)
- Intel GPU: [with OpenVINO and DirectML execution providers in ONNX Runtime](#resnet-optimization-with-openvino-and-dml-execution-providers)
-
+- Qualcomm NPU: [with QNN execution provider in ONNX Runtime](#resnet-optimization-with-qnn-execution-providers)
Go to [How to run](#how-to-run)
## Optimization Workflows
@@ -45,6 +45,12 @@ This example performs ResNet optimization with OpenVINO and DML execution provid
- *ONNX Model -> ONNX Runtime performance tuning on multiple ep*
Config file: [resnet_multiple_ep.json](resnet_multiple_ep.json)
+
+### ResNet optimization with QNN execution providers
+This example performs ResNet optimization with QNN execution providers in one workflow. It performs the optimization pipeline:
+- *PyTorch Model -> Onnx Model -> QNN Quantized Onnx Model*
+
+Config file: [resnet_ptq_qnn.json](resnet_ptq_qnn.json)
## How to run
### Pip requirements
Install the necessary python packages:
diff --git a/examples/resnet/imagenet.py b/examples/resnet/imagenet.py
new file mode 100644
index 000000000..2f11acea1
--- /dev/null
+++ b/examples/resnet/imagenet.py
@@ -0,0 +1,74 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+from logging import getLogger
+from pathlib import Path
+
+import numpy as np
+import torchvision.transforms as transforms
+from torch import from_numpy
+from torch.utils.data import Dataset
+
+from olive.data.registry import Registry
+
+logger = getLogger(__name__)
+
+
+class ImagenetDataset(Dataset):
+ def __init__(self, data):
+ self.images = from_numpy(data["images"])
+ self.labels = from_numpy(data["labels"])
+
+ def __len__(self):
+ return min(len(self.images), len(self.labels))
+
+ def __getitem__(self, idx):
+ return {"input": self.images[idx]}, self.labels[idx]
+
+
+@Registry.register_post_process()
+def imagenet_post_fun(output):
+ return output.argmax(axis=1)
+
+
+preprocess = transforms.Compose(
+ [
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+)
+
+
+@Registry.register_pre_process()
+def dataset_pre_process(output_data, **kwargs):
+ cache_key = kwargs.get("cache_key")
+ cache_file = None
+ if cache_key:
+ cache_file = Path(f"./cache/data/{cache_key}.npz")
+ if cache_file.exists():
+ with np.load(Path(cache_file)) as data:
+ return ImagenetDataset(data)
+
+ size = kwargs.get("size", 256)
+ labels = []
+ images = []
+ for i, sample in enumerate(output_data):
+ if i >= size:
+ break
+ image = sample["image"]
+ label = sample["label"]
+ image = image.convert("RGB")
+ image = preprocess(image)
+ images.append(image)
+ labels.append(label)
+
+ result_data = ImagenetDataset({"images": np.array(images), "labels": np.array(labels)})
+
+ if cache_file:
+ cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)
+ np.savez(cache_file, images=np.array(images), labels=np.array(labels))
+
+ return result_data
diff --git a/examples/resnet/resnet_ptq_qnn.json b/examples/resnet/resnet_ptq_qnn.json
new file mode 100644
index 000000000..5378a993c
--- /dev/null
+++ b/examples/resnet/resnet_ptq_qnn.json
@@ -0,0 +1,126 @@
+{
+ "input_model": {
+ "type": "HfModel",
+ "model_path": "microsoft/resnet-50",
+ "task": "image-classification",
+ "io_config": {
+ "input_names": [ "input" ],
+ "input_shapes": [ [ 1, 3, 224, 224 ] ],
+ "output_names": [ "output" ]
+ }
+ },
+ "systems": {
+ "qnn_system": {
+ "type": "LocalSystem",
+ "accelerators": [ { "device": "npu", "execution_providers": [ "QNNExecutionProvider" ] } ]
+ }
+ },
+ "data_configs": [
+ {
+ "name": "quantize_data_config",
+ "type": "HuggingfaceContainer",
+ "user_script": "imagenet.py",
+ "load_dataset_config": {
+ "data_name": "imagenet-1k",
+ "split": "validation",
+ "streaming": true,
+ "trust_remote_code": true
+ },
+ "pre_process_data_config": { "type": "dataset_pre_process", "size": 256, "cache_key": "imagenet256" },
+ "post_process_data_config": { "type": "imagenet_post_fun" }
+ }
+ ],
+ "evaluators": {
+ "common_evaluator": {
+ "metrics": [
+ {
+ "name": "accuracy",
+ "type": "accuracy",
+ "data_config": "quantize_data_config",
+ "sub_types": [
+ {
+ "name": "accuracy_score",
+ "priority": 1,
+ "metric_config": { "task": "multiclass", "num_classes": 1001 }
+ }
+ ],
+ "user_config": {
+ "inference_settings": {
+ "onnx": {
+ "session_options": {
+ "extra_session_config": { "session.disable_cpu_ep_fallback": "1" }
+ },
+ "execution_provider": "QNNExecutionProvider",
+ "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
+ }
+ }
+ }
+ },
+ {
+ "name": "latency_qnn",
+ "type": "latency",
+ "data_config": "quantize_data_config",
+ "sub_types": [
+ { "name": "avg", "priority": 2 },
+ { "name": "p75" },
+ { "name": "p90" },
+ { "name": "p99" }
+ ],
+ "user_config": {
+ "inference_settings": {
+ "onnx": {
+ "session_options": {
+ "extra_session_config": { "session.disable_cpu_ep_fallback": "1" }
+ },
+ "execution_provider": "QNNExecutionProvider",
+ "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
+ }
+ }
+ }
+ },
+ {
+ "name": "latency_cpu",
+ "type": "latency",
+ "data_config": "quantize_data_config",
+ "sub_types": [
+ { "name": "avg", "priority": 3 },
+ { "name": "p75" },
+ { "name": "p90" },
+ { "name": "p99" }
+ ],
+ "user_config": {
+ "inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } }
+ }
+ }
+ ]
+ }
+ },
+ "passes": {
+ "conversion": {
+ "device": "cpu",
+ "type": "OnnxConversion",
+ "target_opset": 17,
+ "save_as_external_data": true,
+ "all_tensors_to_one_file": true,
+ "dynamic": false,
+ "use_dynamo_exporter": false
+ },
+ "QNNPreprocess": { "type": "QNNPreprocess", "fuse_layernorm": true },
+ "OnnxQuantization": {
+ "type": "OnnxQuantization",
+ "data_config": "quantize_data_config",
+ "activation_type": "QUInt16",
+ "weight_type": "QUInt8",
+ "calibrate_method": "MinMax",
+ "quant_preprocess": true,
+ "prepare_qnn_config": true
+ }
+ },
+ "host": "qnn_system",
+ "target": "qnn_system",
+ "evaluator": "common_evaluator",
+ "cache_dir": "cache",
+ "clean_cache": true,
+ "output_dir": "models/resnet_ptq_qnn",
+ "evaluate_input_model": false
+}