Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add stable diffusion qnn version #1572

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/stable_diffusion/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/footprints/
/result_*.png
/quantize_data/
32 changes: 32 additions & 0 deletions examples/stable_diffusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,35 @@ Inference will loop until the generated image. The result will be saved as `resu
Run `python stable_diffusion.py --help` for additional options. A few particularly relevant ones:
- `--image_path <str>`: the input image path for image to image inference.
- `--img_to_img_example`: image to image example. The default input image is `assets/dog.png`, the default prompt is `amazing watercolor painting`.

## Stable Diffusion Optimization with QDQ for QNN EP

How to optimize

`python stable_diffusion.py --model_id stabilityai/stable-diffusion-2-1-base --provider qnn --optimize`

How to test

`python stable_diffusion.py --model_id stabilityai/stable-diffusion-2-1-base --provider qnn --num_inference_steps 5 --guidance_scale 1 --prompt "hamburger swims in the river" --seed 0`

Unoptmized: assets/hamburger.png

`python stable_diffusion.py --model_id stabilityai/stable-diffusion-2-1-base --provider qnn --num_inference_steps 5 --guidance_scale 7.5 --prompt "cat and dog" --seed 0`

Unoptmized: assets/cat.png

Note that in QNN, we need to use static dimensions (batch is fixed to 1), so we need to update `diffusers\pipelines\stable_diffusion\pipeline_onnx_stable_diffusion.py` in `__call__` if `guidance_scale > 1`

```
if do_classifier_free_guidance:
neg_input, text_input = np.split(latent_model_input, 2)
neg_embeds, text_emeds = np.split(prompt_embeds, 2)
noise_pred_uncond = self.unet(sample=neg_input, timestep=timestep, encoder_hidden_states=neg_embeds)
noise_pred_uncond = noise_pred_uncond[0]
noise_pred_text = self.unet(sample=text_input, timestep=timestep, encoder_hidden_states=text_emeds)
noise_pred_text = noise_pred_text[0]
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
else:
noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds)
noise_pred = noise_pred[0]
```
Binary file added examples/stable_diffusion/assets/cat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/stable_diffusion/assets/hamburger.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
29 changes: 28 additions & 1 deletion examples/stable_diffusion/config_text_encoder.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "text_encoder_data_loader", "batch_size": 1 }
},
{
"name": "quantize_data_config",
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "text_encoder_quantize_data_loader", "batch_size": 1 }
}
],
"evaluators": {
Expand All @@ -38,7 +44,7 @@
}
},
"passes": {
"convert": { "type": "OnnxConversion", "target_opset": 14 },
"convert": { "type": "OnnxConversion", "target_opset": 17 },
"ov_convert": {
"type": "OpenVINOConversion",
"user_script": "user_script.py",
Expand Down Expand Up @@ -83,6 +89,27 @@
"float16": true,
"use_gpu": true,
"keep_io_types": false
},
"dynamic_shape_to_fixed": {
"type": "DynamicToFixedShape",
"dim_param": [ "batch", "sequence" ],
"dim_value": [ 1, 77 ]
},
"qnn_preprocess": {
"type": "QNNPreprocess",
"fuse_layernorm": true
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true,
"op_types_to_quantize": [ "MatMul", "LayerNormalization", "Reshape", "Transpose", "Mul", "Gather", "Gelu", "Flatten", "ArgMax" ],
"append_first_op_types_to_quantize_list": false,
"nodes_to_exclude": [ "Add", "Softmax" ]
}
},
"pass_flows": [ [ "convert", "optimize" ] ],
Expand Down
26 changes: 25 additions & 1 deletion examples/stable_diffusion/config_unet.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "unet_data_loader", "batch_size": 1 }
},
{
"name": "quantize_data_config",
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "unet_quantize_data_loader", "batch_size": 1 }
}
],
"evaluators": {
Expand All @@ -49,7 +55,7 @@
"passes": {
"convert": {
"type": "OnnxConversion",
"target_opset": 14,
"target_opset": 17,
"save_as_external_data": true,
"all_tensors_to_one_file": true,
"external_data_name": "weights.pb"
Expand Down Expand Up @@ -98,6 +104,24 @@
"float16": true,
"use_gpu": true,
"keep_io_types": false
},
"dynamic_shape_to_fixed": {
"type": "DynamicToFixedShape",
"dim_param": [ "unet_sample_batch", "unet_sample_channels", "unet_sample_height", "unet_sample_width", "unet_time_batch", "unet_hidden_batch", "unet_hidden_sequence" ],
"dim_value": [ 1, 4, 64, 64, 1, 1, 77 ]
},
"qnn_preprocess": {
"type": "QNNPreprocess",
"fuse_layernorm": true
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true
}
},
"pass_flows": [ [ "convert", "optimize" ] ],
Expand Down
26 changes: 25 additions & 1 deletion examples/stable_diffusion/config_vae_decoder.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "vae_decoder_data_loader", "batch_size": 1 }
},
{
"name": "quantize_data_config",
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "vae_decoder_quantize_data_loader", "batch_size": 1 }
}
],
"evaluators": {
Expand All @@ -45,7 +51,7 @@
}
},
"passes": {
"convert": { "type": "OnnxConversion", "target_opset": 14 },
"convert": { "type": "OnnxConversion", "target_opset": 17 },
"ov_convert": {
"type": "OpenVINOConversion",
"user_script": "user_script.py",
Expand Down Expand Up @@ -90,6 +96,24 @@
"float16": true,
"use_gpu": true,
"keep_io_types": false
},
"dynamic_shape_to_fixed": {
"type": "DynamicToFixedShape",
"dim_param": [ "decoder_batch", "decoder_channels", "decoder_height", "decoder_width" ],
"dim_value": [ 1, 4, 64, 64 ]
},
"qnn_preprocess": {
"type": "QNNPreprocess",
"fuse_layernorm": true
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true
}
},
"pass_flows": [ [ "convert", "optimize" ] ],
Expand Down
26 changes: 25 additions & 1 deletion examples/stable_diffusion/config_vae_encoder.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "vae_encoder_data_loader", "batch_size": 1 }
},
{
"name": "quantize_data_config",
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "vae_encoder_quantize_data_loader", "batch_size": 1 }
}
],
"evaluators": {
Expand All @@ -40,7 +46,7 @@
}
},
"passes": {
"convert": { "type": "OnnxConversion", "target_opset": 14 },
"convert": { "type": "OnnxConversion", "target_opset": 17 },
"ov_convert": {
"type": "OpenVINOConversion",
"user_script": "user_script.py",
Expand Down Expand Up @@ -85,6 +91,24 @@
"float16": true,
"use_gpu": true,
"keep_io_types": false
},
"dynamic_shape_to_fixed": {
"type": "DynamicToFixedShape",
"dim_param": [ "encoder_batch", "encoder_channels", "encoder_height", "encoder_width", "Addlatent_sample_dim_0", "Addlatent_sample_dim_1", "Addlatent_sample_dim_2", "Addlatent_sample_dim_3" ],
"dim_value": [ 1, 3, 512, 512, 1, 4, 64, 64 ]
},
"qnn_preprocess": {
"type": "QNNPreprocess",
"fuse_layernorm": true
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true
}
},
"pass_flows": [ [ "convert", "optimize" ] ],
Expand Down
xieofxie marked this conversation as resolved.
Show resolved Hide resolved
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
3 changes: 3 additions & 0 deletions examples/stable_diffusion/sd_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@
vae_sample_size = 512
unet_sample_size = 64
cross_attention_dim = 768
rand_data = True
xieofxie marked this conversation as resolved.
Show resolved Hide resolved
data_dir = "quantize_data"
data_num = 10
7 changes: 4 additions & 3 deletions examples/stable_diffusion/sd_utils/ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

import json
import shutil
import sys
Expand Down Expand Up @@ -63,7 +64,7 @@
for footprint in footprints.values():
if footprint["from_pass"] == "OnnxConversion":
conversion_footprint = footprint
elif footprint["from_pass"] == "OrtTransformersOptimization":
elif footprint["from_pass"] == "OrtTransformersOptimization" or footprint["from_pass"] == "OnnxStaticQuantization":
optimizer_footprint = footprint

assert conversion_footprint
Expand All @@ -75,7 +76,7 @@
model_info[submodel_name] = {
"unoptimized": {
"path": Path(unoptimized_olive_model.model_path),
},

Check notice

Code scanning / lintrunner

RUFF/E501 Note

Line too long (127 > 120).
See https://docs.astral.sh/ruff/rules/line-too-long
"optimized": {
"path": Path(optimized_olive_model.model_path),
},
Expand Down Expand Up @@ -138,7 +139,7 @@
unet_sample_size = config.unet_sample_size

if static_dims:
hidden_batch_size = batch_size if (guidance_scale == 0.0) else batch_size * 2
hidden_batch_size = batch_size if (guidance_scale <= 1.0) else batch_size * 2
# Not necessary, but helps DML EP further optimize runtime performance.
# batch_size is doubled for sample & hidden state because of classifier free guidance:
# https://github.com/huggingface/diffusers/blob/46c52f9b9607e6ecb29c782c052aea313e6487b7/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L672
Expand All @@ -162,7 +163,7 @@

provider_map = {
"dml": "DmlExecutionProvider",
"cuda": "CUDAExecutionProvider",
"cuda": "CUDAExecutionProvider"
}
assert provider in provider_map, f"Unsupported provider: {provider}"
return OnnxStableDiffusionPipeline.from_pretrained(
Expand Down
Loading
Loading