diff --git a/docs/source/examples.md b/docs/source/examples.md index f52b4d9e1..9a0e129f5 100644 --- a/docs/source/examples.md +++ b/docs/source/examples.md @@ -16,7 +16,7 @@ |Vision|stable diffusion
stable diffusion XL|[Link](https://github.com/microsoft/Olive/tree/main/examples/stable_diffusion)|`GPU`: with ONNX Runtime optimization for DirectML EP
`GPU`: with ONNX Runtime optimization for CUDA EP
`Intel CPU`: with OpenVINO toolkit ||squeezenet|[Link](https://github.com/microsoft/Olive/tree/main/examples/directml/squeezenet)|`GPU`: with ONNX Runtime optimizations with DirectML EP ||mobilenet|[Link](https://github.com/microsoft/Olive/tree/main/examples/mobilenet)|`Qualcomm NPU`: with ONNX Runtime static QDQ quantization for ONNX Runtime QNN EP -||resnet|[Link](https://github.com/microsoft/Olive/tree/main/examples/resnet)|`CPU`: with ONNX Runtime static/dynamic Quantization for ONNX INT8 model
`CPU`: with PyTorch QAT Default Training Loop and ONNX Runtime optimizations for ONNX INT8 model
`CPU`: with PyTorch QAT Lightning Module and ONNX Runtime optimizations for ONNX INT8 model
`AMD DPU`: with AMD Vitis-AI Quantization
`Intel GPU`: with ONNX Runtime optimizations with multiple EPs +||resnet|[Link](https://github.com/microsoft/Olive/tree/main/examples/resnet)|`CPU`: with ONNX Runtime static/dynamic Quantization for ONNX INT8 model
`CPU`: with PyTorch QAT Default Training Loop and ONNX Runtime optimizations for ONNX INT8 model
`CPU`: with PyTorch QAT Lightning Module and ONNX Runtime optimizations for ONNX INT8 model
`AMD DPU`: with AMD Vitis-AI Quantization
`Intel GPU`: with ONNX Runtime optimizations with multiple EPs
`Qualcomm NPU`: with ONNX Runtime static QDQ quantization for ONNX Runtime QNN EP ||VGG|[Link](https://github.com/microsoft/Olive/tree/main/examples/vgg)|`Qualcomm NPU`: with SNPE toolkit ||inception|[Link](https://github.com/microsoft/Olive/tree/main/examples/inception)|`Qualcomm NPU`: with SNPE toolkit ||super resolution|[Link](https://github.com/microsoft/Olive/tree/main/examples/super_resolution)|`CPU`: with ONNX Runtime pre/post processing integration for a single ONNX model diff --git a/examples/resnet/README.md b/examples/resnet/README.md index 43022d1c3..9d7f8e60f 100644 --- a/examples/resnet/README.md +++ b/examples/resnet/README.md @@ -5,7 +5,7 @@ This folder contains examples of ResNet optimization using different workflows. - CPU: [with PyTorch QAT PyTorch Lightning Module and ORT optimizations](#resnet-optimization-with-qat-pytorch-lightning-module-on-cpu) - AMD DPU: [with AMD Vitis-AI Quantization](#resnet-optimization-with-vitis-ai-ptq-on-amd-dpu) - Intel GPU: [with OpenVINO and DirectML execution providers in ONNX Runtime](#resnet-optimization-with-openvino-and-dml-execution-providers) - +- Qualcomm NPU: [with QNN execution provider in ONNX Runtime](#resnet-optimization-with-qnn-execution-providers) Go to [How to run](#how-to-run) ## Optimization Workflows @@ -45,6 +45,12 @@ This example performs ResNet optimization with OpenVINO and DML execution provid - *ONNX Model -> ONNX Runtime performance tuning on multiple ep* Config file: [resnet_multiple_ep.json](resnet_multiple_ep.json) + +### ResNet optimization with QNN execution providers +This example performs ResNet optimization with QNN execution providers in one workflow. It performs the optimization pipeline: +- *PyTorch Model -> Onnx Model -> QNN Quantized Onnx Model* + +Config file: [resnet_ptq_qnn.json](resnet_ptq_qnn.json) ## How to run ### Pip requirements Install the necessary python packages: diff --git a/examples/resnet/imagenet.py b/examples/resnet/imagenet.py new file mode 100644 index 000000000..2f11acea1 --- /dev/null +++ b/examples/resnet/imagenet.py @@ -0,0 +1,74 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from logging import getLogger +from pathlib import Path + +import numpy as np +import torchvision.transforms as transforms +from torch import from_numpy +from torch.utils.data import Dataset + +from olive.data.registry import Registry + +logger = getLogger(__name__) + + +class ImagenetDataset(Dataset): + def __init__(self, data): + self.images = from_numpy(data["images"]) + self.labels = from_numpy(data["labels"]) + + def __len__(self): + return min(len(self.images), len(self.labels)) + + def __getitem__(self, idx): + return {"input": self.images[idx]}, self.labels[idx] + + +@Registry.register_post_process() +def imagenet_post_fun(output): + return output.argmax(axis=1) + + +preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] +) + + +@Registry.register_pre_process() +def dataset_pre_process(output_data, **kwargs): + cache_key = kwargs.get("cache_key") + cache_file = None + if cache_key: + cache_file = Path(f"./cache/data/{cache_key}.npz") + if cache_file.exists(): + with np.load(Path(cache_file)) as data: + return ImagenetDataset(data) + + size = kwargs.get("size", 256) + labels = [] + images = [] + for i, sample in enumerate(output_data): + if i >= size: + break + image = sample["image"] + label = sample["label"] + image = image.convert("RGB") + image = preprocess(image) + images.append(image) + labels.append(label) + + result_data = ImagenetDataset({"images": np.array(images), "labels": np.array(labels)}) + + if cache_file: + cache_file.parent.resolve().mkdir(parents=True, exist_ok=True) + np.savez(cache_file, images=np.array(images), labels=np.array(labels)) + + return result_data diff --git a/examples/resnet/resnet_ptq_qnn.json b/examples/resnet/resnet_ptq_qnn.json new file mode 100644 index 000000000..5378a993c --- /dev/null +++ b/examples/resnet/resnet_ptq_qnn.json @@ -0,0 +1,126 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/resnet-50", + "task": "image-classification", + "io_config": { + "input_names": [ "input" ], + "input_shapes": [ [ 1, 3, 224, 224 ] ], + "output_names": [ "output" ] + } + }, + "systems": { + "qnn_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "npu", "execution_providers": [ "QNNExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "imagenet-1k", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { "type": "dataset_pre_process", "size": 256, "cache_key": "imagenet256" }, + "post_process_data_config": { "type": "imagenet_post_fun" } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "quantize_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { "task": "multiclass", "num_classes": 1001 } + } + ], + "user_config": { + "inference_settings": { + "onnx": { + "session_options": { + "extra_session_config": { "session.disable_cpu_ep_fallback": "1" } + }, + "execution_provider": "QNNExecutionProvider", + "provider_options": [ { "backend_path": "QnnHtp.dll" } ] + } + } + } + }, + { + "name": "latency_qnn", + "type": "latency", + "data_config": "quantize_data_config", + "sub_types": [ + { "name": "avg", "priority": 2 }, + { "name": "p75" }, + { "name": "p90" }, + { "name": "p99" } + ], + "user_config": { + "inference_settings": { + "onnx": { + "session_options": { + "extra_session_config": { "session.disable_cpu_ep_fallback": "1" } + }, + "execution_provider": "QNNExecutionProvider", + "provider_options": [ { "backend_path": "QnnHtp.dll" } ] + } + } + } + }, + { + "name": "latency_cpu", + "type": "latency", + "data_config": "quantize_data_config", + "sub_types": [ + { "name": "avg", "priority": 3 }, + { "name": "p75" }, + { "name": "p90" }, + { "name": "p99" } + ], + "user_config": { + "inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } } + } + } + ] + } + }, + "passes": { + "conversion": { + "device": "cpu", + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "dynamic": false, + "use_dynamo_exporter": false + }, + "QNNPreprocess": { "type": "QNNPreprocess", "fuse_layernorm": true }, + "OnnxQuantization": { + "type": "OnnxQuantization", + "data_config": "quantize_data_config", + "activation_type": "QUInt16", + "weight_type": "QUInt8", + "calibrate_method": "MinMax", + "quant_preprocess": true, + "prepare_qnn_config": true + } + }, + "host": "qnn_system", + "target": "qnn_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "clean_cache": true, + "output_dir": "models/resnet_ptq_qnn", + "evaluate_input_model": false +}