Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add stable diffusion qnn version #1572

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions examples/stable_diffusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,35 @@ Inference will loop until the generated image. The result will be saved as `resu
Run `python stable_diffusion.py --help` for additional options. A few particularly relevant ones:
- `--image_path <str>`: the input image path for image to image inference.
- `--img_to_img_example`: image to image example. The default input image is `assets/dog.png`, the default prompt is `amazing watercolor painting`.

## Stable Diffusion Optimization with QDQ for QNN EP

How to optimize

`python stable_diffusion.py --model_id stabilityai/stable-diffusion-2-1-base --provider qnn --optimize`

How to test

`python stable_diffusion.py --model_id stabilityai/stable-diffusion-2-1-base --provider qnn --num_inference_steps 5 --guidance_scale 1 --prompt "hamburger swims in the river" --seed 0`

Unoptmized: assets/hamburger.png

`python stable_diffusion.py --model_id stabilityai/stable-diffusion-2-1-base --provider qnn --num_inference_steps 5 --guidance_scale 7.5 --prompt "cat and dog" --seed 0`

Unoptmized: assets/cat.png

Note that in QNN, we need to use static dimensions (batch is fixed to 1), so we need to update `diffusers\pipelines\stable_diffusion\pipeline_onnx_stable_diffusion.py` in `__call__` if `guidance_scale > 1`

```
if do_classifier_free_guidance:
neg_input, text_input = np.split(latent_model_input, 2)
neg_embeds, text_emeds = np.split(prompt_embeds, 2)
noise_pred_uncond = self.unet(sample=neg_input, timestep=timestep, encoder_hidden_states=neg_embeds)
noise_pred_uncond = noise_pred_uncond[0]
noise_pred_text = self.unet(sample=text_input, timestep=timestep, encoder_hidden_states=text_emeds)
noise_pred_text = noise_pred_text[0]
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
else:
noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds)
noise_pred = noise_pred[0]
```
Binary file added examples/stable_diffusion/assets/cat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/stable_diffusion/assets/hamburger.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
29 changes: 28 additions & 1 deletion examples/stable_diffusion/config_text_encoder.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "text_encoder_data_loader", "batch_size": 1 }
},
{
"name": "quantize_data_config",
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "text_encoder_quantize_data_loader", "batch_size": 1 }
}
],
"evaluators": {
Expand All @@ -38,7 +44,7 @@
}
},
"passes": {
"convert": { "type": "OnnxConversion", "target_opset": 14 },
"convert": { "type": "OnnxConversion", "target_opset": 17 },
"ov_convert": {
"type": "OpenVINOConversion",
"user_script": "user_script.py",
Expand Down Expand Up @@ -83,6 +89,27 @@
"float16": true,
"use_gpu": true,
"keep_io_types": false
},
"dynamic_shape_to_fixed": {
"type": "DynamicToFixedShape",
"dim_param": [ "batch", "sequence" ],
"dim_value": [ 1, 77 ]
},
"qnn_preprocess": {
"type": "QNNPreprocess",
"fuse_layernorm": true
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true,
"op_types_to_quantize": [ "MatMul", "LayerNormalization", "Reshape", "Transpose", "Mul", "Gather", "Gelu", "Flatten", "ArgMax" ],
"append_first_op_types_to_quantize_list": false,
"nodes_to_exclude": [ "Add", "Softmax" ]
}
},
"pass_flows": [ [ "convert", "optimize" ] ],
Expand Down
26 changes: 25 additions & 1 deletion examples/stable_diffusion/config_unet.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "unet_data_loader", "batch_size": 1 }
},
{
"name": "quantize_data_config",
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "unet_quantize_data_loader", "batch_size": 1 }
}
],
"evaluators": {
Expand All @@ -49,7 +55,7 @@
"passes": {
"convert": {
"type": "OnnxConversion",
"target_opset": 14,
"target_opset": 17,
"save_as_external_data": true,
"all_tensors_to_one_file": true,
"external_data_name": "weights.pb"
Expand Down Expand Up @@ -98,6 +104,24 @@
"float16": true,
"use_gpu": true,
"keep_io_types": false
},
"dynamic_shape_to_fixed": {
"type": "DynamicToFixedShape",
"dim_param": [ "unet_sample_batch", "unet_sample_channels", "unet_sample_height", "unet_sample_width", "unet_time_batch", "unet_hidden_batch", "unet_hidden_sequence" ],
"dim_value": [ 1, 4, 64, 64, 1, 1, 77 ]
},
"qnn_preprocess": {
"type": "QNNPreprocess",
"fuse_layernorm": true
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true
}
},
"pass_flows": [ [ "convert", "optimize" ] ],
Expand Down
26 changes: 25 additions & 1 deletion examples/stable_diffusion/config_vae_decoder.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "vae_decoder_data_loader", "batch_size": 1 }
},
{
"name": "quantize_data_config",
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "vae_decoder_quantize_data_loader", "batch_size": 1 }
}
],
"evaluators": {
Expand All @@ -45,7 +51,7 @@
}
},
"passes": {
"convert": { "type": "OnnxConversion", "target_opset": 14 },
"convert": { "type": "OnnxConversion", "target_opset": 17 },
"ov_convert": {
"type": "OpenVINOConversion",
"user_script": "user_script.py",
Expand Down Expand Up @@ -90,6 +96,24 @@
"float16": true,
"use_gpu": true,
"keep_io_types": false
},
"dynamic_shape_to_fixed": {
"type": "DynamicToFixedShape",
"dim_param": [ "decoder_batch", "decoder_channels", "decoder_height", "decoder_width" ],
"dim_value": [ 1, 4, 64, 64 ]
},
"qnn_preprocess": {
"type": "QNNPreprocess",
"fuse_layernorm": true
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true
}
},
"pass_flows": [ [ "convert", "optimize" ] ],
Expand Down
26 changes: 25 additions & 1 deletion examples/stable_diffusion/config_vae_encoder.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "vae_encoder_data_loader", "batch_size": 1 }
},
{
"name": "quantize_data_config",
"user_script": "user_script.py",
"load_dataset_config": { "type": "local_dataset" },
"dataloader_config": { "type": "vae_encoder_quantize_data_loader", "batch_size": 1 }
}
],
"evaluators": {
Expand All @@ -40,7 +46,7 @@
}
},
"passes": {
"convert": { "type": "OnnxConversion", "target_opset": 14 },
"convert": { "type": "OnnxConversion", "target_opset": 17 },
"ov_convert": {
"type": "OpenVINOConversion",
"user_script": "user_script.py",
Expand Down Expand Up @@ -85,6 +91,24 @@
"float16": true,
"use_gpu": true,
"keep_io_types": false
},
"dynamic_shape_to_fixed": {
"type": "DynamicToFixedShape",
"dim_param": [ "encoder_batch", "encoder_channels", "encoder_height", "encoder_width", "Addlatent_sample_dim_0", "Addlatent_sample_dim_1", "Addlatent_sample_dim_2", "Addlatent_sample_dim_3" ],
"dim_value": [ 1, 3, 512, 512, 1, 4, 64, 64 ]
},
"qnn_preprocess": {
"type": "QNNPreprocess",
"fuse_layernorm": true
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "quantize_data_config",
"activation_type": "QUInt16",
"weight_type": "QUInt8",
"calibrate_method": "MinMax",
"quant_preprocess": true,
"prepare_qnn_config": true
}
},
"pass_flows": [ [ "convert", "optimize" ] ],
Expand Down
xieofxie marked this conversation as resolved.
Show resolved Hide resolved
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions examples/stable_diffusion/sd_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
vae_sample_size = 512
unet_sample_size = 64
cross_attention_dim = 768
rand_data = True
xieofxie marked this conversation as resolved.
Show resolved Hide resolved
18 changes: 16 additions & 2 deletions examples/stable_diffusion/sd_utils/ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@
return config_cuda


def update_qnn_config(config: Dict, submodel_name: str):

Check warning

Code scanning / lintrunner

PYLINT/W0621 Warning

Redefining name 'config' from outer scope (line 15) (redefined-outer-name)
See redefined-outer-name.
# TODO onnx or onnxruntime needs to fix this

Check warning

Code scanning / lintrunner

RUFF/TD002 Warning

Missing author in TODO; try: # TODO(<author_name>): ... or # TODO @<author_name>: ....
See https://docs.astral.sh/ruff/rules/missing-todo-author

Check warning

Code scanning / lintrunner

RUFF/TD004 Warning

if submodel_name == "unet":
config["input_model"]["io_config"]["dynamic_axes"] = None
config["pass_flows"] = [["convert", "qnn_preprocess", "quantization"]]
else:
config["pass_flows"] = [["convert", "dynamic_shape_to_fixed", "qnn_preprocess", "quantization"]]
config["systems"]["local_system"]["accelerators"][0]["device"] = "npu"
config["systems"]["local_system"]["accelerators"][0]["execution_providers"] = ["QNNExecutionProvider"]
config["evaluator"] = None
return config


def validate_args(args, provider):
ort.set_default_logger_severity(4)
if args.static_dims:
Expand Down Expand Up @@ -63,7 +76,7 @@
for footprint in footprints.values():
if footprint["from_pass"] == "OnnxConversion":
conversion_footprint = footprint
elif footprint["from_pass"] == "OrtTransformersOptimization":
elif footprint["from_pass"] == "OrtTransformersOptimization" or footprint["from_pass"] == "OnnxStaticQuantization":

Check notice

Code scanning / lintrunner

RUFF/E501 Note

Line too long (127 > 120).
See https://docs.astral.sh/ruff/rules/line-too-long
optimizer_footprint = footprint

assert conversion_footprint
Expand Down Expand Up @@ -138,7 +151,7 @@
unet_sample_size = config.unet_sample_size

if static_dims:
hidden_batch_size = batch_size if (guidance_scale == 0.0) else batch_size * 2
hidden_batch_size = batch_size if (guidance_scale <= 1.0) else batch_size * 2
# Not necessary, but helps DML EP further optimize runtime performance.
# batch_size is doubled for sample & hidden state because of classifier free guidance:
# https://github.com/huggingface/diffusers/blob/46c52f9b9607e6ecb29c782c052aea313e6487b7/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L672
Expand All @@ -163,6 +176,7 @@
provider_map = {
"dml": "DmlExecutionProvider",
"cuda": "CUDAExecutionProvider",
"qnn": "CPUExecutionProvider"
xieofxie marked this conversation as resolved.
Show resolved Hide resolved
}
assert provider in provider_map, f"Unsupported provider: {provider}"
return OnnxStableDiffusionPipeline.from_pretrained(
Expand Down
10 changes: 7 additions & 3 deletions examples/stable_diffusion/stable_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
window.mainloop()


def update_config_with_provider(config: Dict, provider: str):
def update_config_with_provider(config: Dict, provider: str, submodel_name: str):
if provider == "dml":
# DirectML EP is the default, so no need to update config.
return config
Expand All @@ -189,6 +189,10 @@
from sd_utils.ov import update_ov_config

return update_ov_config(config)
elif provider == "qnn":
from sd_utils.ort import update_qnn_config

return update_qnn_config(config, submodel_name)
else:
raise ValueError(f"Unsupported provider: {provider}")

Expand Down Expand Up @@ -244,7 +248,7 @@
olive_config = None
with (script_dir / f"config_{submodel_name}.json").open() as fin:
olive_config = json.load(fin)
olive_config = update_config_with_provider(olive_config, provider)
olive_config = update_config_with_provider(olive_config, provider, submodel_name)

if submodel_name in ("unet", "text_encoder"):
olive_config["input_model"]["model_path"] = model_id
Expand Down Expand Up @@ -284,7 +288,7 @@

parser.add_argument("--model_id", default="CompVis/stable-diffusion-v1-4", type=str)
parser.add_argument(
"--provider", default="dml", type=str, choices=["dml", "cuda", "openvino"], help="Execution provider to use"
"--provider", default="dml", type=str, choices=["dml", "cuda", "openvino", "qnn"], help="Execution provider to use"

Check notice

Code scanning / lintrunner

RUFF/E501 Note

Line too long (123 > 120).
See https://docs.astral.sh/ruff/rules/line-too-long
)
parser.add_argument("--optimize", action="store_true", help="Runs the optimization step")
parser.add_argument("--clean_cache", action="store_true", help="Deletes the Olive cache")
Expand Down
Loading
Loading