diff --git a/examples/clip/README.md b/examples/clip/README.md index 30f60d4e0..12505ef60 100644 --- a/examples/clip/README.md +++ b/examples/clip/README.md @@ -14,9 +14,11 @@ This workflow performs CLIP VIT optimization on Qualcomm NPU with ONNX Runtime P It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed. -Config file: [openai_clip-vit-base-patch16_ptq_qnn.json](openai_clip-vit-base-patch16_ptq_qnn.json) -Config file: [openai_clip-vit-base-patch32_ptq_qnn.json](openai_clip-vit-base-patch32_ptq_qnn.json) -Config file: [openai_clip-vit-large-patch14_ptq_qnn.json](openai_clip-vit-large-patch14_ptq_qnn.json) +OpenAI clip model config file: [openai_clip-vit-base-patch16_ptq_qnn.json](openai_clip-vit-base-patch16_ptq_qnn.json) + [openai_clip-vit-base-patch32_ptq_qnn.json](openai_clip-vit-base-patch32_ptq_qnn.json) + [openai_clip-vit-large-patch14_ptq_qnn.json](openai_clip-vit-large-patch14_ptq_qnn.json) + +Open clip model config file: [laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json](laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json) **NOTE:** The model optimization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step. diff --git a/examples/clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json b/examples/clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json new file mode 100644 index 000000000..e60d974e1 --- /dev/null +++ b/examples/clip/laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn.json @@ -0,0 +1,112 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "task": "zero-shot-image-classification", + "load_kwargs": { "attn_implementation": "eager" }, + "io_config": { + "input_names": [ "input_ids", "pixel_values", "attention_mask" ], + "input_shapes": [ [ 10, 77 ], [ 1, 3, 224, 224 ], [ 10, 77 ] ], + "input_types": [ "int64", "float32", "int64" ], + "output_names": [ "logits_per_image" ], + "output_shapes": [ [ 1, 2 ] ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "npu", "execution_providers": [ "NPUExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "quant_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { "type": "no_auto_batch_dataloader" } + }, + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 10, + "end": 20 + }, + "dataloader_config": { "type": "no_auto_batch_dataloader" }, + "post_process_data_config": { "type": "clip_post_process" } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } } + ] + }, + { + "name": "latency_qnn", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } }, + { "name": "max" }, + { "name": "min" } + ] + }, + { + "name": "latency_cpu", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { "name": "avg", "priority": 3, "goal": { "type": "percent-min-improvement", "value": 0.1 } }, + { "name": "max" }, + { "name": "min" } + ], + "user_config": { + "inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } } + } + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ { "name": "avg" }, { "name": "max" }, { "name": "min" } ] + } + ] + } + }, + "passes": { + "conversion": { "type": "OnnxConversion", "target_opset": 17 }, + "qnn_preprocess": { "type": "QNNPreprocess" }, + "quantization": { + "type": "OnnxStaticQuantization", + "quant_preprocess": true, + "data_config": "quant_data_config", + "op_types_to_quantize": [ "MatMul", "LayerNormalization", "Gemm", "Sigmoid", "Gelu" ], + "activation_type": "QUInt16", + "weight_type": "QUInt8", + "calibrate_method": "MinMax" + } + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "local_system", + "target": "local_system", + "cache_dir": "cache", + "clean_cache": true, + "output_dir": "models/CLIP-ViT-B-32-laion2B-s34B-b79K" +} diff --git a/examples/clip/openai_clip-vit-base-patch16_ptq_qnn.json b/examples/clip/openai_clip-vit-base-patch16_ptq_qnn.json index f9f141aca..d7533d320 100644 --- a/examples/clip/openai_clip-vit-base-patch16_ptq_qnn.json +++ b/examples/clip/openai_clip-vit-base-patch16_ptq_qnn.json @@ -55,15 +55,7 @@ "data_config": "metric_data_config", "sub_types": [ { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } } - ], - "user_config": { - "inference_settings": { - "onnx": { - "execution_provider": "QNNExecutionProvider", - "provider_options": [ { "backend_path": "QnnHtp.dll" } ] - } - } - } + ] }, { "name": "latency_qnn", @@ -73,15 +65,7 @@ { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } }, { "name": "max" }, { "name": "min" } - ], - "user_config": { - "inference_settings": { - "onnx": { - "execution_provider": "QNNExecutionProvider", - "provider_options": [ { "backend_path": "QnnHtp.dll" } ] - } - } - } + ] }, { "name": "latency_cpu", diff --git a/examples/clip/openai_clip-vit-base-patch32_ptq_qnn.json b/examples/clip/openai_clip-vit-base-patch32_ptq_qnn.json index c71141c74..74fd39921 100644 --- a/examples/clip/openai_clip-vit-base-patch32_ptq_qnn.json +++ b/examples/clip/openai_clip-vit-base-patch32_ptq_qnn.json @@ -55,15 +55,7 @@ "data_config": "metric_data_config", "sub_types": [ { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } } - ], - "user_config": { - "inference_settings": { - "onnx": { - "execution_provider": "QNNExecutionProvider", - "provider_options": [ { "backend_path": "QnnHtp.dll" } ] - } - } - } + ] }, { "name": "latency_qnn", @@ -73,15 +65,7 @@ { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } }, { "name": "max" }, { "name": "min" } - ], - "user_config": { - "inference_settings": { - "onnx": { - "execution_provider": "QNNExecutionProvider", - "provider_options": [ { "backend_path": "QnnHtp.dll" } ] - } - } - } + ] }, { "name": "latency_cpu", diff --git a/examples/clip/openai_clip-vit-large-patch14_ptq_qnn.json b/examples/clip/openai_clip-vit-large-patch14_ptq_qnn.json index 85c50f87c..77adca3f3 100644 --- a/examples/clip/openai_clip-vit-large-patch14_ptq_qnn.json +++ b/examples/clip/openai_clip-vit-large-patch14_ptq_qnn.json @@ -55,15 +55,7 @@ "data_config": "metric_data_config", "sub_types": [ { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } } - ], - "user_config": { - "inference_settings": { - "onnx": { - "execution_provider": "QNNExecutionProvider", - "provider_options": [ { "backend_path": "QnnHtp.dll" } ] - } - } - } + ] }, { "name": "latency_qnn", @@ -73,15 +65,7 @@ { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } }, { "name": "max" }, { "name": "min" } - ], - "user_config": { - "inference_settings": { - "onnx": { - "execution_provider": "QNNExecutionProvider", - "provider_options": [ { "backend_path": "QnnHtp.dll" } ] - } - } - } + ] }, { "name": "latency_cpu",