diff --git a/reference/cli.html b/reference/cli.html index 0ac4c6eb0..a4932c05c 100644 --- a/reference/cli.html +++ b/reference/cli.html @@ -479,7 +479,7 @@

Finetune [--train_subset TRAIN_SUBSET] [--train_split TRAIN_SPLIT] [--eval_subset EVAL_SUBSET] [--eval_split EVAL_SPLIT] [--data_files DATA_FILES] - [--text_field TEXT_FIELD | --text_template TEXT_TEMPLATE] + [--text_field TEXT_FIELD | --text_template TEXT_TEMPLATE | --use_chat_template] [--max_seq_len MAX_SEQ_LEN] [--add_special_tokens ADD_SPECIAL_TOKENS] [--max_samples MAX_SAMPLES] [--batch_size BATCH_SIZE] @@ -544,6 +544,10 @@

Named Arguments[--implementation {awq,bnb4,gptq,inc_dynamic,matmul4,mnb_to_qdq,nvmo,onnx_dynamic}] [--enable-qdq-encoding] [-d DATA_NAME] [--subset SUBSET] [--split SPLIT] [--data_files DATA_FILES] - [--text_field TEXT_FIELD | --text_template TEXT_TEMPLATE] + [--text_field TEXT_FIELD | --text_template TEXT_TEMPLATE | --use_chat_template] [--max_seq_len MAX_SEQ_LEN] [--add_special_tokens ADD_SPECIAL_TOKENS] [--max_samples MAX_SAMPLES] [--batch_size BATCH_SIZE] @@ -862,6 +866,10 @@

Named Arguments#<
data_config#
-

- Data config for quantization. Default value is None.

+

Data config for quantization. If not provided, wikitest train data will be used for HfModels. Required for PyTorch models.

type: olive.data.config.DataConfig | Dict

default_value: None

search_defaults: None

@@ -5329,7 +5328,7 @@

Pytorch#<
data_config#
-

Data config for quantization. Default value is None.

+

Data config for quantization. If not provided, pile validation data will be used.

type: olive.data.config.DataConfig | Dict

default_value: None

search_defaults: None

diff --git a/searchindex.js b/searchindex.js index 02b83670c..b3f0a776a 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {" Install with pip": [[5, "install-with-pip"]], "1. Define a new class": [[3, "define-a-new-class"]], "2. Define configuration": [[3, "define-configuration"]], "3. Implement the run function": [[3, "implement-the-run-function"]], " Define the workflow in a YAML file": [[9, "define-the-workflow-in-a-yaml-file"]], " Auto-Optimize the model and adapters": [[7, "auto-optimize-the-model-and-adapters"]], " Automatic model optimization with Olive": [[5, "automatic-model-optimization-with-olive"]], " Run the workflow": [[9, "run-the-workflow"]], " Quickstart": [[6, "quickstart"], [7, "quickstart"], [8, "quickstart"], [9, "quickstart"]], "Accelerator Configuration": [[35, "accelerator-configuration"]], "Accuracy Metric": [[19, "accuracy-metric"]], "Add AML extension to cluster": [[12, "add-aml-extension-to-cluster"]], "Add local Kubernetes cluster to Azure Arc": [[12, "add-local-kubernetes-cluster-to-azure-arc"]], "Alternative Configuration": [[16, "alternative-configuration"]], "Append Pre/Post Processing Ops": [[22, "append-pre-post-processing-ops"]], "AppendPrePostProcessingOps": [[43, "appendprepostprocessingops"]], "Approach": [[28, "approach"]], "Auto Optimization": [[6, null]], "Auto Optimizer Configuration": [[10, "auto-optimizer-configuration"]], "Auto-Optimization": [[39, "auto-optimization"]], "AutoAWQ": [[34, "autoawq"]], "AutoAWQQuantizer": [[43, "autoawqquantizer"]], "AutoGPTQ": [[34, "autogptq"]], "Azure AI": [[14, null]], "Azure AI Integration": [[11, null]], "Azure ML Client": [[42, "azure-ml-client"]], "Azure ML model": [[18, "azure-ml-model"]], "Azure ML scripts": [[13, null]], "Azure Machine Learning client": [[11, "azure-machine-learning-client"]], "AzureML": [[39, "azureml"]], "AzureML Datastore": [[20, "azureml-datastore"]], "AzureML Job Output": [[20, "azureml-job-output"]], "AzureML Model": [[20, "azureml-model"]], "AzureML Readymade Systems": [[35, "azureml-readymade-systems"]], "AzureML Registry Model": [[20, "azureml-registry-model"]], "AzureML System": [[35, "azureml-system"]], "AzureML system": [[18, "azureml-system"]], "AzureMLData": [[27, "azuremldata"]], "AzureMLDeployment": [[27, "azuremldeployment"]], "AzureMLModels": [[27, "azuremlmodels"]], "Benefits of using Olive": [[45, "benefits-of-using-olive"]], "CLI": [[28, "cli"]], "CandidateModels": [[27, "candidatemodels"]], "Capture Onnx Graph": [[39, "capture-onnx-graph"]], "CaptureSplitInfo": [[43, "capturesplitinfo"]], "Command Line Tools": [[39, null]], "Composite Model Handler": [[41, "composite-model-handler"]], "Conclusion": [[28, "conclusion"]], "Configs with built-in component": [[17, "configs-with-built-in-component"]], "Configs with customized component": [[17, "configs-with-customized-component"]], "Configurations": [[29, "configurations"], [29, "id3"], [29, "id12"], [29, "id17"], [29, "id20"], [29, "id23"], [29, "id26"]], "Configure Azure Blob Storage": [[16, "configure-azure-blob-storage"]], "Configure Olive QNN": [[25, "configure-olive-qnn"]], "Configure Olive SNPE": [[26, "configure-olive-snpe"]], "Configure Workflows (Advanced)": [[36, "configure-workflows-advanced"]], "Configure an AzureML system": [[15, "configure-an-azureml-system"]], "Configure multiple metrics": [[19, "configure-multiple-metrics"]], "Configure the Shared Cache": [[16, "configure-the-shared-cache"]], "Context Binary Generation": [[25, "context-binary-generation"]], "Convert Adapters": [[39, "convert-adapters"]], "Convert dynamic shape to fixed shape": [[30, "convert-dynamic-shape-to-fixed-shape"]], "Create Kubernetes cluster": [[12, "create-kubernetes-cluster"]], "Custom Metric": [[19, "custom-metric"]], "Custom Scripts": [[1, null]], "Datatype Mapping": [[30, "datatype-mapping"]], "Description": [[29, "description"], [29, "id2"], [29, "id5"], [29, "id7"], [29, "id9"], [29, "id11"], [29, "id14"], [29, "id16"], [29, "id19"], [29, "id22"], [29, "id25"]], "Design": [[2, null]], "Distributed Hf Model Handler": [[41, "distributed-hf-model-handler"]], "Distributed Onnx Model Handler": [[41, "distributed-onnx-model-handler"]], "Docker System": [[35, "docker-system"]], "Dockerfile": [[27, "dockerfile"]], "Download and unzip QNN SDK": [[25, "download-and-unzip-qnn-sdk"]], "Download and unzip SNPE SDK": [[26, "download-and-unzip-snpe-sdk"]], "DynamicToFixedShape": [[43, "dynamictofixedshape"]], "Editable install": [[37, "editable-install"]], "Engine": [[2, "engine"]], "Engine Information": [[42, "engine-information"]], "Evaluator": [[2, "evaluator"]], "Evaluators Information": [[42, "evaluators-information"]], "Example": [[3, "example"], [29, "example"], [29, "id1"], [29, "id4"], [29, "id6"], [29, "id8"], [29, "id10"], [29, "id13"], [29, "id15"], [29, "id18"], [29, "id21"], [29, "id24"], [29, "id27"], [42, "example"], [42, "id1"], [42, "id2"], [42, "id3"], [42, "id4"], [42, "id5"], [42, "id6"]], "Example Configuration": [[22, "example-configuration"], [22, "id1"], [22, "id2"], [22, "id3"], [22, "id4"], [22, "id5"], [23, "example-configuration"], [23, "id1"], [24, "example-configuration"], [24, "id1"], [24, "id2"], [24, "id3"], [24, "id4"], [24, "id5"], [25, "example-configuration"], [25, "id1"], [25, "id2"], [26, "example-configuration"], [26, "id1"], [30, "example-configuration"], [30, "id1"], [30, "id2"], [30, "id3"], [30, "id4"], [30, "id5"], [31, "example-configuration"], [33, "example-configuration"], [33, "id1"], [33, "id2"], [33, "id3"], [34, "example-configuration"], [34, "id1"], [34, "id2"], [34, "id3"]], "Examples": [[0, null], [1, "examples"], [1, "id1"]], "Execution Order": [[2, "execution-order"]], "ExhaustiveSearchAlgorithm": [[44, "exhaustivesearchalgorithm"]], "ExposeOutputs": [[29, "exposeoutputs"]], "ExposeQuantizedOutput": [[29, "exposequantizedoutput"]], "Extending Olive": [[4, null]], "Extract Adapters": [[22, "extract-adapters"]], "ExtractAdapters": [[43, "extractadapters"]], "Finetune": [[7, null], [39, "finetune"]], "Float16 Conversion": [[30, "float16-conversion"]], "Generate Adapters": [[39, "generate-adapters"]], "Generate Cost Model for Model Splitting": [[39, "generate-cost-model-for-model-splitting"]], "Generic Data Config": [[17, "generic-data-config"]], "Getting started": [[5, null]], "GptqQuantizer": [[43, "gptqquantizer"]], "GraphSurgeries": [[43, "graphsurgeries"]], "Hf Model Handler": [[41, "hf-model-handler"]], "How To Configure Data": [[17, null]], "How To Configure Metrics": [[19, null]], "How To Configure Systems": [[35, null]], "How To Set Model Path": [[20, null]], "How to add new optimization Pass": [[3, null]], "How to configure a Workflow Pass": [[32, null]], "How to package Olive artifacts": [[27, "how-to-package-olive-artifacts"]], "How to use Automatic Optimizer": [[10, null]], "How-to": [[36, null]], "Huggingface Hub model": [[18, "huggingface-hub-model"]], "Huggingface Integration": [[18, null]], "Huggingface datasets": [[18, "huggingface-datasets"]], "Huggingface login": [[18, "huggingface-login"]], "Huggingface metrics": [[18, "huggingface-metrics"]], "Important Note": [[29, "important-note"]], "Important Notes": [[16, "important-notes"]], "IncDynamicQuantization": [[43, "incdynamicquantization"]], "IncQuantization": [[43, "incquantization"]], "IncStaticQuantization": [[43, "incstaticquantization"]], "InferShapes": [[29, "infershapes"]], "Inference config file": [[27, "inference-config-file"]], "Inference model using ONNX Runtime": [[5, "inference-model-using-onnx-runtime"], [6, "inference-model-using-onnx-runtime"], [7, "inference-model-using-onnx-runtime"]], "Input Model": [[18, "input-model"]], "Input Model Information": [[42, "input-model-information"]], "Inputs/Outputs DataType Conversion": [[30, "inputs-outputs-datatype-conversion"]], "Insert Beam Search Op": [[22, "insert-beam-search-op"]], "InsertBeamSearch": [[43, "insertbeamsearch"]], "Install Dependencies": [[16, "install-dependencies"]], "Install Extra Dependencies": [[15, "install-extra-dependencies"]], "Install from source": [[37, "install-from-source"]], "Install with pip": [[37, "install-with-pip"]], "Installation": [[37, null]], "Integrations": [[36, "integrations"]], "Introduction": [[18, "introduction"]], "Isolated ORT System": [[35, "isolated-ort-system"]], "Latency Metric": [[19, "latency-metric"]], "Link Azure Arc Kubernetes cluster to Azure Machine Learning": [[12, "link-azure-arc-kubernetes-cluster-to-azure-machine-learning"]], "LoRA": [[24, "lora"], [43, "lora"]], "LoRA options": [[39, "lora-options"]], "LoRA/QLoRA/LoftQ HFTrainingArguments": [[43, "lora-qlora-loftq-hftrainingarguments"]], "Local File": [[20, "local-file"]], "Local Folder": [[20, "local-folder"]], "Local Model Path": [[20, "local-model-path"]], "Local System": [[35, "local-system"]], "Local model": [[18, "local-model"]], "Local system, docker system and Python environment system": [[18, "local-system-docker-system-and-python-environment-system"]], "LoftQ": [[24, "loftq"], [43, "loftq"]], "Log-in to Hugging Face": [[5, "log-in-to-hugging-face"]], "Managed AzureML System": [[35, "managed-azureml-system"]], "Managed Docker System": [[35, "managed-docker-system"]], "Managed Python Environment System": [[35, "managed-python-environment-system"]], "MatMulNBitsToQDQ": [[43, "matmulnbitstoqdq"]], "MergeAdapterWeights": [[24, "mergeadapterweights"], [43, "mergeadapterweights"]], "Metric Types": [[19, "metric-types"]], "Metrics file": [[27, "metrics-file"]], "Mixed Precision Conversion": [[30, "mixed-precision-conversion"]], "MixedPrecisionOverrides": [[43, "mixedprecisionoverrides"]], "Model Builder options": [[39, "model-builder-options"]], "Model Configuration": [[21, null], [41, "model-configuration"]], "Model Conversion": [[23, "model-conversion"], [26, "model-conversion"], [30, "model-conversion"]], "Model Conversion/Quantization": [[25, "model-conversion-quantization"]], "Model Library Generation": [[25, "model-library-generation"]], "Model Optimization": [[21, "model-optimization"]], "Model Script File Information": [[39, "model-script-file-information"]], "Model Splitting": [[28, null]], "Model config loading": [[18, "model-config-loading"]], "Model configuration file": [[27, "model-configuration-file"]], "ModelBuilder": [[43, "modelbuilder"]], "Models rank JSON file": [[27, "models-rank-json-file"]], "More details on arguments": [[5, "more-details-on-arguments"], [6, "more-details-on-arguments"]], "NVIDIA TensorRT Model Optimizer-Windows": [[33, "nvidia-tensorrt-model-optimizer-windows"]], "Named Arguments": [[39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"]], "Native AzureML System": [[35, "native-azureml-system"]], "Native Docker System": [[35, "native-docker-system"]], "Native Python Environment System": [[35, "native-python-environment-system"]], "Notebook available!": [[5, "notebook-available"]], "ONNX": [[22, null], [30, null], [43, "onnx"]], "ONNX Model Handler": [[41, "onnx-model-handler"]], "ONNX Quantization": [[33, null]], "ONNX Surgeon Classes Documentation": [[29, null]], "ORT Performance Tuning": [[22, "ort-performance-tuning"]], "ORT Transformers Optimization": [[22, "ort-transformers-optimization"]], "Olive Options": [[42, null]], "Olive: The AI Model Optimization Toolkit for the ONNX Runtime": [[38, null]], "OliveModels": [[41, null]], "OnnxConversion": [[43, "onnxconversion"]], "OnnxDynamicQuantization": [[43, "onnxdynamicquantization"]], "OnnxFloatToFloat16": [[43, "onnxfloattofloat16"]], "OnnxIODataTypeConverter": [[43, "onnxiodatatypeconverter"]], "OnnxMatMul4Quantizer": [[43, "onnxmatmul4quantizer"]], "OnnxOpVersionConversion": [[43, "onnxopversionconversion"]], "OnnxPeepholeOptimizer": [[43, "onnxpeepholeoptimizer"]], "OnnxQuantization": [[43, "onnxquantization"]], "OnnxStaticQuantization": [[43, "onnxstaticquantization"]], "OpenVINO": [[23, null], [43, "openvino"]], "OpenVINO Model Handler": [[41, "openvino-model-handler"]], "OpenVINOConversion": [[43, "openvinoconversion"]], "OpenVINOQuantization": [[43, "openvinoquantization"]], "OptimumConversion": [[43, "optimumconversion"]], "OptimumMerging": [[43, "optimummerging"]], "Option 1: install Olive with OpenVINO extras": [[23, "option-1-install-olive-with-openvino-extras"]], "Option 2: Install OpenVINO Runtime and OpenVINO Development Tools from Pypi": [[23, "option-2-install-openvino-runtime-and-openvino-development-tools-from-pypi"]], "Optional Dependencies": [[37, "optional-dependencies"]], "OrtMixedPrecision": [[43, "ortmixedprecision"]], "OrtSessionParamsTuning": [[43, "ortsessionparamstuning"]], "OrtTransformersOptimization": [[43, "orttransformersoptimization"]], "Overview": [[16, "overview"], [45, null]], "Packaged files": [[27, "packaged-files"]], "Packaging Olive artifacts": [[27, null]], "Pass": [[2, "pass"]], "Pass Flows Information": [[42, "pass-flows-information"]], "Passes": [[43, null]], "Passes Information": [[42, "passes-information"]], "Peeophole Optimizer": [[22, "peeophole-optimizer"]], "Post Training Quantization (PTQ)": [[23, "post-training-quantization-ptq"], [26, "post-training-quantization-ptq"]], "Pre-processing for Finetuning": [[8, "pre-processing-for-finetuning"]], "Prerequisites": [[23, "prerequisites"], [25, "prerequisites"], [26, "prerequisites"], [35, "prerequisites"], [35, "id1"]], "Providing Input Models": [[39, "providing-input-models"]], "PyTorch": [[24, null], [31, null]], "PyTorch Exporter options": [[39, "pytorch-exporter-options"]], "PyTorch Model Handler": [[41, "pytorch-model-handler"]], "PyTorch Quantization": [[34, null]], "Python Environment System": [[35, "python-environment-system"]], "Pytorch": [[43, "pytorch"]], "QLoRA": [[24, "qlora"], [43, "qlora"]], "QNN": [[25, null], [43, "qnn"]], "QNNContextBinaryGenerator": [[43, "qnncontextbinarygenerator"]], "QNNConversion": [[43, "qnnconversion"]], "QNNModelLibGenerator": [[43, "qnnmodellibgenerator"]], "QNNPreprocess": [[43, "qnnpreprocess"]], "QuaRot": [[34, "quarot"], [43, "quarot"]], "Qualcomm SDK": [[39, "qualcomm-sdk"]], "Quantization": [[39, "quantization"]], "Quantization Aware Training": [[24, "quantization-aware-training"]], "Quantization with ONNX Optimizations": [[8, "quantization-with-onnx-optimizations"]], "QuantizationAwareTraining": [[43, "quantizationawaretraining"]], "Quantize": [[8, null]], "Quantize with AMD Vitis AI Quantizer": [[33, "quantize-with-amd-vitis-ai-quantizer"]], "Quantize with Intel\u00ae Neural Compressor": [[33, "quantize-with-intel-neural-compressor"]], "Quantize with onnxruntime": [[33, "quantize-with-onnxruntime"]], "RandomSearchAlgorithm": [[44, "randomsearchalgorithm"]], "Reference": [[40, null]], "Remote Model Path": [[20, "remote-model-path"]], "Remote Workflow": [[15, null]], "RemoveInitializerFromInputs": [[29, "removeinitializerfrominputs"]], "RemoveInputs": [[29, "removeinputs"]], "RemoveShapes": [[29, "removeshapes"]], "RenameInputs": [[29, "renameinputs"]], "RenameOutputs": [[29, "renameoutputs"]], "ReorderInputs": [[29, "reorderinputs"]], "ReplaceErfWithTanh": [[29, "replaceerfwithtanh"]], "Run": [[39, "run"]], "Run Olive Workflow": [[15, "run-olive-workflow"]], "Run Olive workflows": [[9, null]], "Running Olive Workflow Remotely on Azure Machine Learning workspace compute": [[15, "running-olive-workflow-remotely-on-azure-machine-learning-workspace-compute"]], "SNPE": [[26, null], [43, "snpe"]], "SNPE Model Handler": [[41, "snpe-model-handler"]], "SNPEConversion": [[43, "snpeconversion"]], "SNPEQuantization": [[43, "snpequantization"]], "SNPEtoONNXConversion": [[43, "snpetoonnxconversion"]], "Scripts list": [[13, "scripts-list"]], "Search Algorithm": [[2, "search-algorithm"]], "Search Strategy": [[2, "search-strategy"]], "SearchAlgorithms": [[44, null]], "Self-hosted Kubernetes cluster": [[12, null]], "Set-up": [[36, "set-up"]], "Setup and Usage": [[16, "setup-and-usage"]], "Shared Cache": [[16, null], [39, "shared-cache"]], "SliceGPT": [[24, "slicegpt"], [43, "slicegpt"]], "SparseGPT": [[24, "sparsegpt"], [43, "sparsegpt"]], "SpinQuant": [[34, "spinquant"], [43, "spinquant"]], "SplitModel": [[43, "splitmodel"]], "String Name": [[20, "string-name"]], "Supported Data Config Template": [[17, "supported-data-config-template"]], "Supported quantization techniques": [[8, "supported-quantization-techniques"]], "Surgeries": [[29, "surgeries"]], "System": [[2, "system"]], "Systems Information": [[42, "systems-information"]], "TPESearchAlgorithm": [[44, "tpesearchalgorithm"]], "Throughput Metric": [[19, "throughput-metric"]], "TorchTRTConversion": [[31, "torchtrtconversion"], [43, "torchtrtconversion"]], "Try Olive": [[45, "try-olive"]], "Tune OnnxRuntime Session Params": [[39, "tune-onnxruntime-session-params"]], "Usage": [[13, "usage"]], "Using Azure ML compute as host or target": [[11, "using-azure-ml-compute-as-host-or-target"]], "Using AzureML curated model": [[11, "using-azureml-curated-model"]], "Using AzureML registered model": [[11, "using-azureml-registered-model"]], "Using a model from an AzureML job output": [[11, "using-a-model-from-an-azureml-job-output"]], "Using data stored in AzureML datastore": [[11, "using-data-stored-in-azureml-datastore"]], "Using model stored in AzureML datastore": [[11, "using-model-stored-in-azureml-datastore"]], "VitisAIQuantization": [[43, "vitisaiquantization"]], "What is Olive Packaging": [[27, "what-is-olive-packaging"]], "What is Olive?": [[45, "what-is-olive"]], "Workflow Host": [[42, "workflow-host"]], "Workflow ID": [[42, "workflow-id"]], "Workflow outputs": [[15, "workflow-outputs"]], "Working with the CLI": [[36, "working-with-the-cli"]], "ZeroOutInput": [[29, "zerooutinput"]], "Zipfile": [[27, "zipfile"]], "aml_config.json:": [[42, "aml-config-json"]], "auto-opt": [[28, "auto-opt"]], "azureml_client with aml_config_path:": [[42, "azureml-client-with-aml-config-path"]], "azureml_client with azureml config fields:": [[42, "azureml-client-with-azureml-config-fields"]], "azureml_client:": [[42, "azureml-client"]], "generate-cost-model": [[28, "generate-cost-model"]], "manage_compute_instance": [[13, "manage-compute-instance"]], "script_dir": [[1, "script-dir"]], "user_script": [[1, "user-script"]]}, "docnames": ["examples", "extending/custom-scripts", "extending/design", "extending/how-to-add-optimization-pass", "extending/index", "getting-started/getting-started", "how-to/cli/cli-auto-opt", "how-to/cli/cli-finetune", "how-to/cli/cli-quantize", "how-to/cli/cli-run", "how-to/configure-workflows/auto-opt", "how-to/configure-workflows/azure-ai/azure-ai", "how-to/configure-workflows/azure-ai/azure-arc", "how-to/configure-workflows/azure-ai/azure-script", "how-to/configure-workflows/azure-ai/index", "how-to/configure-workflows/azure-ai/remote-workflow", "how-to/configure-workflows/azure-ai/shared-model-cache", "how-to/configure-workflows/how-to-configure-data", "how-to/configure-workflows/huggingface-integration", "how-to/configure-workflows/metrics-configuration", "how-to/configure-workflows/model-opt-and-transform/configure-model-path", "how-to/configure-workflows/model-opt-and-transform/index", "how-to/configure-workflows/model-opt-and-transform/onnx", "how-to/configure-workflows/model-opt-and-transform/openvino", "how-to/configure-workflows/model-opt-and-transform/pytorch", "how-to/configure-workflows/model-opt-and-transform/qnn", "how-to/configure-workflows/model-opt-and-transform/snpe", "how-to/configure-workflows/model-packaging", "how-to/configure-workflows/model-splitting", "how-to/configure-workflows/onnx-graph-surgeon", "how-to/configure-workflows/pass/convert-onnx", "how-to/configure-workflows/pass/convert-pytorch", "how-to/configure-workflows/pass/pass-configuration", "how-to/configure-workflows/pass/quantization-onnx", "how-to/configure-workflows/pass/quantization-pytorch", "how-to/configure-workflows/systems", "how-to/index", "how-to/installation", "index", "reference/cli", "reference/index", "reference/model", "reference/options", "reference/pass", "reference/search-algorithm", "why-olive"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["examples.md", "extending/custom-scripts.md", "extending/design.md", "extending/how-to-add-optimization-pass.md", "extending/index.rst", "getting-started/getting-started.md", "how-to/cli/cli-auto-opt.md", "how-to/cli/cli-finetune.md", "how-to/cli/cli-quantize.md", "how-to/cli/cli-run.md", "how-to/configure-workflows/auto-opt.md", "how-to/configure-workflows/azure-ai/azure-ai.md", "how-to/configure-workflows/azure-ai/azure-arc.md", "how-to/configure-workflows/azure-ai/azure-script.md", "how-to/configure-workflows/azure-ai/index.rst", "how-to/configure-workflows/azure-ai/remote-workflow.md", "how-to/configure-workflows/azure-ai/shared-model-cache.md", "how-to/configure-workflows/how-to-configure-data.md", "how-to/configure-workflows/huggingface-integration.md", "how-to/configure-workflows/metrics-configuration.md", "how-to/configure-workflows/model-opt-and-transform/configure-model-path.md", "how-to/configure-workflows/model-opt-and-transform/index.rst", "how-to/configure-workflows/model-opt-and-transform/onnx.md", "how-to/configure-workflows/model-opt-and-transform/openvino.md", "how-to/configure-workflows/model-opt-and-transform/pytorch.md", "how-to/configure-workflows/model-opt-and-transform/qnn.md", "how-to/configure-workflows/model-opt-and-transform/snpe.md", "how-to/configure-workflows/model-packaging.md", "how-to/configure-workflows/model-splitting.md", "how-to/configure-workflows/onnx-graph-surgeon.md", "how-to/configure-workflows/pass/convert-onnx.md", "how-to/configure-workflows/pass/convert-pytorch.md", "how-to/configure-workflows/pass/pass-configuration.md", "how-to/configure-workflows/pass/quantization-onnx.md", "how-to/configure-workflows/pass/quantization-pytorch.md", "how-to/configure-workflows/systems.md", "how-to/index.rst", "how-to/installation.md", "index.md", "reference/cli.rst", "reference/index.rst", "reference/model.rst", "reference/options.md", "reference/pass.rst", "reference/search-algorithm.rst", "why-olive.md"], "indexentries": {"a_bits": [[43, "cmdoption-arg-a_bits", false]], "a_per_token": [[43, "cmdoption-arg-a_per_token", false]], "a_symmetric": [[43, "cmdoption-arg-a_symmetric", false]], "accuracy_level": [[43, "cmdoption-arg-accuracy_level", false]], "activation_type": [[43, "cmdoption-arg-180", false], [43, "cmdoption-arg-85", false], [43, "cmdoption-arg-activation_type", false]], "activationsymmetric": [[43, "cmdoption-arg-181", false], [43, "cmdoption-arg-62", false], [43, "cmdoption-arg-89", false], [43, "cmdoption-arg-ActivationSymmetric", false]], "add_zero_point": [[43, "cmdoption-arg-add_zero_point", false]], "addqdqpairtoweight": [[43, "cmdoption-arg-AddQDQPairToWeight", false]], "algorithm": [[43, "cmdoption-arg-algorithm", false]], "all_tensors_to_one_file": [[43, "cmdoption-arg-102", false], [43, "cmdoption-arg-108", false], [43, "cmdoption-arg-113", false], [43, "cmdoption-arg-118", false], [43, "cmdoption-arg-12", false], [43, "cmdoption-arg-125", false], [43, "cmdoption-arg-145", false], [43, "cmdoption-arg-166", false], [43, "cmdoption-arg-185", false], [43, "cmdoption-arg-191", false], [43, "cmdoption-arg-197", false], [43, "cmdoption-arg-2", false], [43, "cmdoption-arg-202", false], [43, "cmdoption-arg-207", false], [43, "cmdoption-arg-21", false], [43, "cmdoption-arg-217", false], [43, "cmdoption-arg-26", false], [43, "cmdoption-arg-280", false], [43, "cmdoption-arg-32", false], [43, "cmdoption-arg-37", false], [43, "cmdoption-arg-42", false], [43, "cmdoption-arg-47", false], [43, "cmdoption-arg-69", false], [43, "cmdoption-arg-7", false], [43, "cmdoption-arg-96", false], [43, "cmdoption-arg-all_tensors_to_one_file", false]], "allow_tf32": [[43, "cmdoption-arg-230", false], [43, "cmdoption-arg-241", false], [43, "cmdoption-arg-allow_tf32", false]], "append_first_op_types_to_quantize_list": [[43, "cmdoption-arg-54", false], [43, "cmdoption-arg-76", false], [43, "cmdoption-arg-append_first_op_types_to_quantize_list", false]], "approach": [[43, "cmdoption-arg-129", false], [43, "cmdoption-arg-149", false], [43, "cmdoption-arg-approach", false]], "atol": [[43, "cmdoption-arg-atol", false]], "backend": [[43, "cmdoption-arg-131", false], [43, "cmdoption-arg-151", false], [43, "cmdoption-arg-285", false], [43, "cmdoption-arg-backend", false]], "binary_file": [[43, "cmdoption-arg-binary_file", false]], "bits": [[43, "cmdoption-arg-bits", false]], "block_size": [[43, "cmdoption-arg-block_size", false]], "block_to_split": [[43, "cmdoption-arg-block_to_split", false]], "blocksize": [[43, "cmdoption-arg-blocksize", false]], "calibrate_method": [[43, "cmdoption-arg-178", false], [43, "cmdoption-arg-83", false], [43, "cmdoption-arg-calibrate_method", false]], "calibration_batch_size": [[43, "cmdoption-arg-calibration_batch_size", false]], "calibration_data_config": [[43, "cmdoption-arg-calibration_data_config", false]], "calibration_nsamples": [[43, "cmdoption-arg-calibration_nsamples", false]], "calibration_sampling_size": [[43, "cmdoption-arg-164", false], [43, "cmdoption-arg-calibration_sampling_size", false]], "checkpoint_path": [[43, "cmdoption-arg-checkpoint_path", false]], "command line option": [[43, "cmdoption-arg-0", false], [43, "cmdoption-arg-1", false], [43, "cmdoption-arg-10", false], [43, "cmdoption-arg-100", false], [43, "cmdoption-arg-101", false], [43, "cmdoption-arg-102", false], [43, "cmdoption-arg-103", false], [43, "cmdoption-arg-104", false], [43, "cmdoption-arg-105", false], [43, "cmdoption-arg-106", false], [43, "cmdoption-arg-107", false], [43, "cmdoption-arg-108", false], [43, "cmdoption-arg-109", false], [43, "cmdoption-arg-11", false], [43, "cmdoption-arg-110", false], [43, "cmdoption-arg-111", false], [43, "cmdoption-arg-112", false], [43, "cmdoption-arg-113", false], [43, "cmdoption-arg-114", false], [43, "cmdoption-arg-115", false], [43, "cmdoption-arg-116", false], [43, "cmdoption-arg-117", false], [43, "cmdoption-arg-118", false], [43, "cmdoption-arg-119", false], [43, "cmdoption-arg-12", false], [43, "cmdoption-arg-120", false], [43, "cmdoption-arg-121", false], [43, "cmdoption-arg-122", false], [43, "cmdoption-arg-123", false], [43, "cmdoption-arg-124", false], [43, "cmdoption-arg-125", false], [43, "cmdoption-arg-126", false], [43, "cmdoption-arg-127", false], [43, "cmdoption-arg-128", false], [43, "cmdoption-arg-129", false], [43, "cmdoption-arg-13", false], [43, "cmdoption-arg-130", false], [43, "cmdoption-arg-131", false], [43, "cmdoption-arg-132", false], [43, "cmdoption-arg-133", false], [43, "cmdoption-arg-134", false], [43, "cmdoption-arg-135", false], [43, "cmdoption-arg-136", false], [43, "cmdoption-arg-137", false], [43, "cmdoption-arg-138", false], [43, "cmdoption-arg-139", false], [43, "cmdoption-arg-14", false], [43, "cmdoption-arg-140", false], [43, "cmdoption-arg-141", false], [43, "cmdoption-arg-142", false], [43, "cmdoption-arg-143", false], [43, "cmdoption-arg-144", false], [43, "cmdoption-arg-145", false], [43, "cmdoption-arg-146", false], [43, "cmdoption-arg-147", false], [43, "cmdoption-arg-148", false], [43, "cmdoption-arg-149", false], [43, "cmdoption-arg-15", false], [43, "cmdoption-arg-150", false], [43, "cmdoption-arg-151", false], [43, "cmdoption-arg-152", false], [43, "cmdoption-arg-153", false], [43, "cmdoption-arg-154", false], [43, "cmdoption-arg-155", false], [43, "cmdoption-arg-156", false], [43, "cmdoption-arg-157", false], [43, "cmdoption-arg-158", false], [43, "cmdoption-arg-159", false], [43, "cmdoption-arg-16", false], [43, "cmdoption-arg-160", false], [43, "cmdoption-arg-161", false], [43, "cmdoption-arg-162", false], [43, "cmdoption-arg-163", false], [43, "cmdoption-arg-164", false], [43, "cmdoption-arg-165", false], [43, "cmdoption-arg-166", false], [43, "cmdoption-arg-167", false], [43, "cmdoption-arg-168", false], [43, "cmdoption-arg-169", false], [43, "cmdoption-arg-17", false], [43, "cmdoption-arg-170", false], [43, "cmdoption-arg-171", false], [43, "cmdoption-arg-172", false], [43, "cmdoption-arg-173", false], [43, "cmdoption-arg-174", false], [43, "cmdoption-arg-175", false], [43, "cmdoption-arg-176", false], [43, "cmdoption-arg-177", false], [43, "cmdoption-arg-178", false], [43, "cmdoption-arg-179", false], [43, "cmdoption-arg-18", false], [43, "cmdoption-arg-180", false], [43, "cmdoption-arg-181", false], [43, "cmdoption-arg-182", false], [43, "cmdoption-arg-183", false], [43, "cmdoption-arg-184", false], [43, "cmdoption-arg-185", false], [43, "cmdoption-arg-186", false], [43, "cmdoption-arg-187", false], [43, "cmdoption-arg-188", false], [43, "cmdoption-arg-189", false], [43, "cmdoption-arg-19", false], [43, "cmdoption-arg-190", false], [43, "cmdoption-arg-191", false], [43, "cmdoption-arg-192", false], [43, "cmdoption-arg-193", false], [43, "cmdoption-arg-194", false], [43, "cmdoption-arg-195", false], [43, "cmdoption-arg-196", false], [43, "cmdoption-arg-197", false], [43, "cmdoption-arg-198", false], [43, "cmdoption-arg-199", false], [43, "cmdoption-arg-2", false], [43, "cmdoption-arg-20", false], [43, "cmdoption-arg-200", false], [43, "cmdoption-arg-201", false], [43, "cmdoption-arg-202", false], [43, "cmdoption-arg-203", false], [43, "cmdoption-arg-204", false], [43, "cmdoption-arg-205", false], [43, "cmdoption-arg-206", false], [43, "cmdoption-arg-207", false], [43, "cmdoption-arg-208", false], [43, "cmdoption-arg-209", false], [43, "cmdoption-arg-21", false], [43, "cmdoption-arg-210", false], [43, "cmdoption-arg-211", false], [43, "cmdoption-arg-212", false], [43, "cmdoption-arg-213", false], [43, "cmdoption-arg-214", false], [43, "cmdoption-arg-215", false], [43, "cmdoption-arg-216", false], [43, "cmdoption-arg-217", false], [43, "cmdoption-arg-218", false], [43, "cmdoption-arg-219", false], [43, "cmdoption-arg-22", false], [43, "cmdoption-arg-220", false], [43, "cmdoption-arg-221", false], [43, "cmdoption-arg-222", false], [43, "cmdoption-arg-223", false], [43, "cmdoption-arg-224", false], [43, "cmdoption-arg-225", false], [43, "cmdoption-arg-226", false], [43, "cmdoption-arg-227", false], [43, "cmdoption-arg-228", false], [43, "cmdoption-arg-229", false], [43, "cmdoption-arg-23", false], [43, "cmdoption-arg-230", false], [43, "cmdoption-arg-231", false], [43, "cmdoption-arg-232", false], [43, "cmdoption-arg-233", false], [43, "cmdoption-arg-234", false], [43, "cmdoption-arg-235", false], [43, "cmdoption-arg-236", false], [43, "cmdoption-arg-237", false], [43, "cmdoption-arg-238", false], [43, "cmdoption-arg-239", false], [43, "cmdoption-arg-24", false], [43, "cmdoption-arg-240", false], [43, "cmdoption-arg-241", false], [43, "cmdoption-arg-242", false], [43, "cmdoption-arg-243", false], [43, "cmdoption-arg-244", false], [43, "cmdoption-arg-245", false], [43, "cmdoption-arg-246", false], [43, "cmdoption-arg-247", false], [43, "cmdoption-arg-248", false], [43, "cmdoption-arg-249", false], [43, "cmdoption-arg-25", false], [43, "cmdoption-arg-250", false], [43, "cmdoption-arg-251", false], [43, "cmdoption-arg-252", false], [43, "cmdoption-arg-253", false], [43, "cmdoption-arg-254", false], [43, "cmdoption-arg-255", false], [43, "cmdoption-arg-256", false], [43, "cmdoption-arg-257", false], [43, "cmdoption-arg-258", false], [43, "cmdoption-arg-259", false], [43, "cmdoption-arg-26", false], [43, "cmdoption-arg-260", false], [43, "cmdoption-arg-261", false], [43, "cmdoption-arg-262", false], [43, "cmdoption-arg-263", false], [43, "cmdoption-arg-264", false], [43, "cmdoption-arg-265", false], [43, "cmdoption-arg-266", false], [43, "cmdoption-arg-267", false], [43, "cmdoption-arg-268", false], [43, "cmdoption-arg-269", false], [43, "cmdoption-arg-27", false], [43, "cmdoption-arg-270", false], [43, "cmdoption-arg-271", false], [43, "cmdoption-arg-272", false], [43, "cmdoption-arg-273", false], [43, "cmdoption-arg-274", false], [43, "cmdoption-arg-275", false], [43, "cmdoption-arg-276", false], [43, "cmdoption-arg-277", false], [43, "cmdoption-arg-278", false], [43, "cmdoption-arg-279", false], [43, "cmdoption-arg-28", false], [43, "cmdoption-arg-280", false], [43, "cmdoption-arg-281", false], [43, "cmdoption-arg-282", false], [43, "cmdoption-arg-283", false], [43, "cmdoption-arg-284", false], [43, "cmdoption-arg-285", false], [43, "cmdoption-arg-286", false], [43, "cmdoption-arg-29", false], [43, "cmdoption-arg-3", false], [43, "cmdoption-arg-30", false], [43, "cmdoption-arg-31", false], [43, "cmdoption-arg-32", false], [43, "cmdoption-arg-33", false], [43, "cmdoption-arg-34", false], [43, "cmdoption-arg-35", false], [43, "cmdoption-arg-36", false], [43, "cmdoption-arg-37", false], [43, "cmdoption-arg-38", false], [43, "cmdoption-arg-39", false], [43, "cmdoption-arg-4", false], [43, "cmdoption-arg-40", false], [43, "cmdoption-arg-41", false], [43, "cmdoption-arg-42", false], [43, "cmdoption-arg-43", false], [43, "cmdoption-arg-44", false], [43, "cmdoption-arg-45", false], [43, "cmdoption-arg-46", false], [43, "cmdoption-arg-47", false], [43, "cmdoption-arg-48", false], [43, "cmdoption-arg-49", false], [43, "cmdoption-arg-5", false], [43, "cmdoption-arg-50", false], [43, "cmdoption-arg-51", false], [43, "cmdoption-arg-52", false], [43, "cmdoption-arg-53", false], [43, "cmdoption-arg-54", false], [43, "cmdoption-arg-55", false], [43, "cmdoption-arg-56", false], [43, "cmdoption-arg-57", false], [43, "cmdoption-arg-58", false], [43, "cmdoption-arg-59", false], [43, "cmdoption-arg-6", false], [43, "cmdoption-arg-60", false], [43, "cmdoption-arg-61", false], [43, "cmdoption-arg-62", false], [43, "cmdoption-arg-63", false], [43, "cmdoption-arg-64", false], [43, "cmdoption-arg-65", false], [43, "cmdoption-arg-66", false], [43, "cmdoption-arg-67", false], [43, "cmdoption-arg-68", false], [43, "cmdoption-arg-69", false], [43, "cmdoption-arg-7", false], [43, "cmdoption-arg-70", false], [43, "cmdoption-arg-71", false], [43, "cmdoption-arg-72", false], [43, "cmdoption-arg-73", false], [43, "cmdoption-arg-74", false], [43, "cmdoption-arg-75", false], [43, "cmdoption-arg-76", false], [43, "cmdoption-arg-77", false], [43, "cmdoption-arg-78", false], [43, "cmdoption-arg-79", false], [43, "cmdoption-arg-8", false], [43, "cmdoption-arg-80", false], [43, "cmdoption-arg-81", false], [43, "cmdoption-arg-82", false], [43, "cmdoption-arg-83", false], [43, "cmdoption-arg-84", false], [43, "cmdoption-arg-85", false], [43, "cmdoption-arg-86", false], [43, "cmdoption-arg-87", false], [43, "cmdoption-arg-88", false], [43, "cmdoption-arg-89", false], [43, "cmdoption-arg-9", false], [43, "cmdoption-arg-90", false], [43, "cmdoption-arg-91", false], [43, "cmdoption-arg-92", false], [43, "cmdoption-arg-93", false], [43, "cmdoption-arg-94", false], [43, "cmdoption-arg-95", false], [43, "cmdoption-arg-96", false], [43, "cmdoption-arg-97", false], [43, "cmdoption-arg-98", false], [43, "cmdoption-arg-99", false], [43, "cmdoption-arg-ActivationSymmetric", false], [43, "cmdoption-arg-AddQDQPairToWeight", false], [43, "cmdoption-arg-EnableSubgraph", false], [43, "cmdoption-arg-ForceQuantizeNoInputCheck", false], [43, "cmdoption-arg-MatMulConstBOnly", false], [43, "cmdoption-arg-WeightSymmetric", false], [43, "cmdoption-arg-a_bits", false], [43, "cmdoption-arg-a_per_token", false], [43, "cmdoption-arg-a_symmetric", false], [43, "cmdoption-arg-accuracy_level", false], [43, "cmdoption-arg-activation_type", false], [43, "cmdoption-arg-add_zero_point", false], [43, "cmdoption-arg-algorithm", false], [43, "cmdoption-arg-all_tensors_to_one_file", false], [43, "cmdoption-arg-allow_tf32", false], [43, "cmdoption-arg-append_first_op_types_to_quantize_list", false], [43, "cmdoption-arg-approach", false], [43, "cmdoption-arg-atol", false], [43, "cmdoption-arg-backend", false], [43, "cmdoption-arg-binary_file", false], [43, "cmdoption-arg-bits", false], [43, "cmdoption-arg-block_size", false], [43, "cmdoption-arg-block_to_split", false], [43, "cmdoption-arg-blocksize", false], [43, "cmdoption-arg-calibrate_method", false], [43, "cmdoption-arg-calibration_batch_size", false], [43, "cmdoption-arg-calibration_data_config", false], [43, "cmdoption-arg-calibration_nsamples", false], [43, "cmdoption-arg-calibration_sampling_size", false], [43, "cmdoption-arg-checkpoint_path", false], [43, "cmdoption-arg-components", false], [43, "cmdoption-arg-compress_to_fp16", false], [43, "cmdoption-arg-compute_dtype", false], [43, "cmdoption-arg-convert_attribute", false], [43, "cmdoption-arg-cost_model", false], [43, "cmdoption-arg-cpu_cores", false], [43, "cmdoption-arg-damp_percent", false], [43, "cmdoption-arg-data_config", false], [43, "cmdoption-arg-desc_act", false], [43, "cmdoption-arg-device", false], [43, "cmdoption-arg-dim_param", false], [43, "cmdoption-arg-dim_value", false], [43, "cmdoption-arg-do_validate", false], [43, "cmdoption-arg-domain", false], [43, "cmdoption-arg-double_quant", false], [43, "cmdoption-arg-duo_scaling", false], [43, "cmdoption-arg-dynamic", false], [43, "cmdoption-arg-dynamic_lora_r", false], [43, "cmdoption-arg-element_wise_binary_ops", false], [43, "cmdoption-arg-enable_cuda_graph", false], [43, "cmdoption-arg-enable_dpu", false], [43, "cmdoption-arg-enable_htp", false], [43, "cmdoption-arg-enable_profiling", false], [43, "cmdoption-arg-eval_data_config", false], [43, "cmdoption-arg-example_input_func", false], [43, "cmdoption-arg-exclude_embeds", false], [43, "cmdoption-arg-exclude_lm_head", false], [43, "cmdoption-arg-excluded_precisions", false], [43, "cmdoption-arg-execution_mode_list", false], [43, "cmdoption-arg-export_compatible", false], [43, "cmdoption-arg-external_data_name", false], [43, "cmdoption-arg-extra.Sigmoid.nnapi", false], [43, "cmdoption-arg-extra_args", false], [43, "cmdoption-arg-extra_configs", false], [43, "cmdoption-arg-extra_options", false], [43, "cmdoption-arg-extra_session_config", false], [43, "cmdoption-arg-final_orientation", false], [43, "cmdoption-arg-float16", false], [43, "cmdoption-arg-force_evaluate_other_eps", false], [43, "cmdoption-arg-force_fp16_inputs", false], [43, "cmdoption-arg-force_fp32_nodes", false], [43, "cmdoption-arg-force_fp32_ops", false], [43, "cmdoption-arg-fp16", false], [43, "cmdoption-arg-fuse_layernorm", false], [43, "cmdoption-arg-gpus", false], [43, "cmdoption-arg-group_size", false], [43, "cmdoption-arg-hidden_size", false], [43, "cmdoption-arg-htp_socs", false], [43, "cmdoption-arg-ignored_scope", false], [43, "cmdoption-arg-ignored_scope_type", false], [43, "cmdoption-arg-input", false], [43, "cmdoption-arg-input_dim", false], [43, "cmdoption-arg-input_int32", false], [43, "cmdoption-arg-input_layouts", false], [43, "cmdoption-arg-input_model_dtype", false], [43, "cmdoption-arg-input_name", false], [43, "cmdoption-arg-input_names", false], [43, "cmdoption-arg-input_nodes", false], [43, "cmdoption-arg-input_shape", false], [43, "cmdoption-arg-input_shapes", false], [43, "cmdoption-arg-input_types", false], [43, "cmdoption-arg-inputs_to_make_channel_last", false], [43, "cmdoption-arg-inside_layer_modules", false], [43, "cmdoption-arg-int4_accuracy_level", false], [43, "cmdoption-arg-int4_block_size", false], [43, "cmdoption-arg-inter_thread_num_list", false], [43, "cmdoption-arg-intra_thread_num_list", false], [43, "cmdoption-arg-io_bind", false], [43, "cmdoption-arg-is_symmetric", false], [43, "cmdoption-arg-keep_io_types", false], [43, "cmdoption-arg-layer_name_filter", false], [43, "cmdoption-arg-layers_block_name", false], [43, "cmdoption-arg-lib_name", false], [43, "cmdoption-arg-lib_targets", false], [43, "cmdoption-arg-loftq_iter", false], [43, "cmdoption-arg-logger", false], [43, "cmdoption-arg-lora_alpha", false], [43, "cmdoption-arg-lora_dropout", false], [43, "cmdoption-arg-lora_r", false], [43, "cmdoption-arg-make_inputs", false], [43, "cmdoption-arg-max_finite_val", false], [43, "cmdoption-arg-max_layer", false], [43, "cmdoption-arg-merge_adapter_weights", false], [43, "cmdoption-arg-metadata_only", false], [43, "cmdoption-arg-metric", false], [43, "cmdoption-arg-min_layer", false], [43, "cmdoption-arg-min_positive_val", false], [43, "cmdoption-arg-model_type", false], [43, "cmdoption-arg-modules_to_fuse", false], [43, "cmdoption-arg-modules_to_not_convert", false], [43, "cmdoption-arg-modules_to_save", false], [43, "cmdoption-arg-name_pattern", false], [43, "cmdoption-arg-need_layer_fusing", false], [43, "cmdoption-arg-no_repeat_ngram_size", false], [43, "cmdoption-arg-node_block_list", false], [43, "cmdoption-arg-nodes_to_exclude", false], [43, "cmdoption-arg-nodes_to_quantize", false], [43, "cmdoption-arg-num_epochs", false], [43, "cmdoption-arg-num_heads", false], [43, "cmdoption-arg-num_key_value_heads", false], [43, "cmdoption-arg-num_splits", false], [43, "cmdoption-arg-num_steps", false], [43, "cmdoption-arg-only_onnxruntime", false], [43, "cmdoption-arg-op_block_list", false], [43, "cmdoption-arg-op_type_dict", false], [43, "cmdoption-arg-op_types_to_quantize", false], [43, "cmdoption-arg-opt_level", false], [43, "cmdoption-arg-opt_level_list", false], [43, "cmdoption-arg-optimization_options", false], [43, "cmdoption-arg-optimize_model", false], [43, "cmdoption-arg-optional_inputs", false], [43, "cmdoption-arg-out_node", false], [43, "cmdoption-arg-output_model", false], [43, "cmdoption-arg-output_names", false], [43, "cmdoption-arg-output_nodes", false], [43, "cmdoption-arg-outputs_to_make_channel_last", false], [43, "cmdoption-arg-outside_layer_modules", false], [43, "cmdoption-arg-overrides_config", false], [43, "cmdoption-arg-parallel_jobs", false], [43, "cmdoption-arg-past_key_value_name", false], [43, "cmdoption-arg-per_channel", false], [43, "cmdoption-arg-percdamp", false], [43, "cmdoption-arg-post", false], [43, "cmdoption-arg-pre", false], [43, "cmdoption-arg-precision", false], [43, "cmdoption-arg-prepare_qnn_config", false], [43, "cmdoption-arg-preset", false], [43, "cmdoption-arg-provider_options_list", false], [43, "cmdoption-arg-providers_list", false], [43, "cmdoption-arg-ptl_data_module", false], [43, "cmdoption-arg-ptl_module", false], [43, "cmdoption-arg-q_group_size", false], [43, "cmdoption-arg-qconfig_func", false], [43, "cmdoption-arg-qnn_extra_options", false], [43, "cmdoption-arg-quant_format", false], [43, "cmdoption-arg-quant_level", false], [43, "cmdoption-arg-quant_mode", false], [43, "cmdoption-arg-quant_preprocess", false], [43, "cmdoption-arg-quant_type", false], [43, "cmdoption-arg-recipes", false], [43, "cmdoption-arg-reduce_range", false], [43, "cmdoption-arg-rotate_mode", false], [43, "cmdoption-arg-round_interval", false], [43, "cmdoption-arg-save_as_external_data", false], [43, "cmdoption-arg-save_format", false], [43, "cmdoption-arg-save_metadata_for_token_generation", false], [43, "cmdoption-arg-save_quant_config", false], [43, "cmdoption-arg-script_dir", false], [43, "cmdoption-arg-search", false], [43, "cmdoption-arg-seed", false], [43, "cmdoption-arg-size_threshold", false], [43, "cmdoption-arg-source_dtype", false], [43, "cmdoption-arg-sparsity", false], [43, "cmdoption-arg-static_groups", false], [43, "cmdoption-arg-strict", false], [43, "cmdoption-arg-surgeries", false], [43, "cmdoption-arg-sym", false], [43, "cmdoption-arg-target_device", false], [43, "cmdoption-arg-target_dtype", false], [43, "cmdoption-arg-target_modules", false], [43, "cmdoption-arg-target_opset", false], [43, "cmdoption-arg-tool_command", false], [43, "cmdoption-arg-tool_command_args", false], [43, "cmdoption-arg-torch_dtype", false], [43, "cmdoption-arg-train_data_config", false], [43, "cmdoption-arg-training_args", false], [43, "cmdoption-arg-training_loop_func", false], [43, "cmdoption-arg-trt_fp16_enable", false], [43, "cmdoption-arg-true_sequential", false], [43, "cmdoption-arg-tuning_criterion", false], [43, "cmdoption-arg-use_dynamo_exporter", false], [43, "cmdoption-arg-use_enhanced_quantizer", false], [43, "cmdoption-arg-use_external_data_format", false], [43, "cmdoption-arg-use_forced_decoder_ids", false], [43, "cmdoption-arg-use_gpu", false], [43, "cmdoption-arg-use_gqa", false], [43, "cmdoption-arg-use_int4", false], [43, "cmdoption-arg-use_logits_processor", false], [43, "cmdoption-arg-use_prefix_vocab_mask", false], [43, "cmdoption-arg-use_symbolic_shape_infer", false], [43, "cmdoption-arg-use_temperature", false], [43, "cmdoption-arg-use_transpose_op", false], [43, "cmdoption-arg-use_vocab_mask", false], [43, "cmdoption-arg-user_script", false], [43, "cmdoption-arg-val_data_config", false], [43, "cmdoption-arg-version", false], [43, "cmdoption-arg-w_bit", false], [43, "cmdoption-arg-weight_only_config", false], [43, "cmdoption-arg-weight_only_quant_configs", false], [43, "cmdoption-arg-weight_type", false], [43, "cmdoption-arg-workspace", false], [43, "cmdoption-arg-zero_point", false], [44, "cmdoption-arg-0", false], [44, "cmdoption-arg-1", false], [44, "cmdoption-arg-group", false], [44, "cmdoption-arg-multivariate", false], [44, "cmdoption-arg-num_samples", false], [44, "cmdoption-arg-seed", false], [44, "cmdoption-arg-with_replacement", false]], "components": [[43, "cmdoption-arg-components", false]], "compositemodelhandler (class in olive.model)": [[41, "olive.model.CompositeModelHandler", false]], "compress_to_fp16": [[43, "cmdoption-arg-compress_to_fp16", false]], "compute_dtype": [[43, "cmdoption-arg-234", false], [43, "cmdoption-arg-compute_dtype", false]], "convert_attribute": [[43, "cmdoption-arg-10", false], [43, "cmdoption-arg-105", false], [43, "cmdoption-arg-111", false], [43, "cmdoption-arg-116", false], [43, "cmdoption-arg-121", false], [43, "cmdoption-arg-128", false], [43, "cmdoption-arg-148", false], [43, "cmdoption-arg-15", false], [43, "cmdoption-arg-169", false], [43, "cmdoption-arg-188", false], [43, "cmdoption-arg-194", false], [43, "cmdoption-arg-200", false], [43, "cmdoption-arg-205", false], [43, "cmdoption-arg-210", false], [43, "cmdoption-arg-220", false], [43, "cmdoption-arg-24", false], [43, "cmdoption-arg-283", false], [43, "cmdoption-arg-29", false], [43, "cmdoption-arg-35", false], [43, "cmdoption-arg-40", false], [43, "cmdoption-arg-45", false], [43, "cmdoption-arg-5", false], [43, "cmdoption-arg-50", false], [43, "cmdoption-arg-72", false], [43, "cmdoption-arg-99", false], [43, "cmdoption-arg-convert_attribute", false]], "cost_model": [[43, "cmdoption-arg-cost_model", false]], "cpu_cores": [[43, "cmdoption-arg-cpu_cores", false]], "damp_percent": [[43, "cmdoption-arg-damp_percent", false]], "data_config": [[43, "cmdoption-arg-106", false], [43, "cmdoption-arg-142", false], [43, "cmdoption-arg-162", false], [43, "cmdoption-arg-171", false], [43, "cmdoption-arg-249", false], [43, "cmdoption-arg-258", false], [43, "cmdoption-arg-261", false], [43, "cmdoption-arg-266", false], [43, "cmdoption-arg-271", false], [43, "cmdoption-arg-275", false], [43, "cmdoption-arg-60", false], [43, "cmdoption-arg-82", false], [43, "cmdoption-arg-data_config", false]], "desc_act": [[43, "cmdoption-arg-desc_act", false]], "device": [[43, "cmdoption-arg-122", false], [43, "cmdoption-arg-130", false], [43, "cmdoption-arg-150", false], [43, "cmdoption-arg-18", false], [43, "cmdoption-arg-215", false], [43, "cmdoption-arg-248", false], [43, "cmdoption-arg-device", false]], "dim_param": [[43, "cmdoption-arg-dim_param", false]], "dim_value": [[43, "cmdoption-arg-dim_value", false]], "distributedhfmodelhandler (class in olive.model)": [[41, "olive.model.DistributedHfModelHandler", false]], "distributedonnxmodelhandler (class in olive.model)": [[41, "olive.model.DistributedOnnxModelHandler", false]], "do_validate": [[43, "cmdoption-arg-do_validate", false]], "domain": [[43, "cmdoption-arg-132", false], [43, "cmdoption-arg-152", false], [43, "cmdoption-arg-domain", false]], "double_quant": [[43, "cmdoption-arg-double_quant", false]], "duo_scaling": [[43, "cmdoption-arg-duo_scaling", false]], "dynamic": [[43, "cmdoption-arg-dynamic", false]], "dynamic_lora_r": [[43, "cmdoption-arg-dynamic_lora_r", false]], "element_wise_binary_ops": [[43, "cmdoption-arg-element_wise_binary_ops", false]], "enable_cuda_graph": [[43, "cmdoption-arg-221", false], [43, "cmdoption-arg-enable_cuda_graph", false]], "enable_dpu": [[43, "cmdoption-arg-enable_dpu", false]], "enable_htp": [[43, "cmdoption-arg-enable_htp", false]], "enable_profiling": [[43, "cmdoption-arg-enable_profiling", false]], "enablesubgraph": [[43, "cmdoption-arg-64", false], [43, "cmdoption-arg-91", false], [43, "cmdoption-arg-EnableSubgraph", false]], "eval_data_config": [[43, "cmdoption-arg-232", false], [43, "cmdoption-arg-243", false], [43, "cmdoption-arg-eval_data_config", false]], "evaluation_strategy (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.evaluation_strategy", false]], "example_input_func": [[43, "cmdoption-arg-example_input_func", false]], "exclude_embeds": [[43, "cmdoption-arg-222", false], [43, "cmdoption-arg-exclude_embeds", false]], "exclude_lm_head": [[43, "cmdoption-arg-223", false], [43, "cmdoption-arg-exclude_lm_head", false]], "excluded_precisions": [[43, "cmdoption-arg-137", false], [43, "cmdoption-arg-157", false], [43, "cmdoption-arg-excluded_precisions", false]], "execution_mode_list": [[43, "cmdoption-arg-execution_mode_list", false]], "export_compatible": [[43, "cmdoption-arg-export_compatible", false]], "external_data_name": [[43, "cmdoption-arg-103", false], [43, "cmdoption-arg-109", false], [43, "cmdoption-arg-114", false], [43, "cmdoption-arg-119", false], [43, "cmdoption-arg-126", false], [43, "cmdoption-arg-13", false], [43, "cmdoption-arg-146", false], [43, "cmdoption-arg-167", false], [43, "cmdoption-arg-186", false], [43, "cmdoption-arg-192", false], [43, "cmdoption-arg-198", false], [43, "cmdoption-arg-203", false], [43, "cmdoption-arg-208", false], [43, "cmdoption-arg-218", false], [43, "cmdoption-arg-22", false], [43, "cmdoption-arg-27", false], [43, "cmdoption-arg-281", false], [43, "cmdoption-arg-3", false], [43, "cmdoption-arg-33", false], [43, "cmdoption-arg-38", false], [43, "cmdoption-arg-43", false], [43, "cmdoption-arg-48", false], [43, "cmdoption-arg-70", false], [43, "cmdoption-arg-8", false], [43, "cmdoption-arg-97", false], [43, "cmdoption-arg-external_data_name", false]], "extra.sigmoid.nnapi": [[43, "cmdoption-arg-61", false], [43, "cmdoption-arg-88", false], [43, "cmdoption-arg-extra.Sigmoid.nnapi", false]], "extra_args": [[43, "cmdoption-arg-274", false], [43, "cmdoption-arg-276", false], [43, "cmdoption-arg-284", false], [43, "cmdoption-arg-286", false], [43, "cmdoption-arg-extra_args", false]], "extra_configs": [[43, "cmdoption-arg-273", false], [43, "cmdoption-arg-extra_configs", false]], "extra_options": [[43, "cmdoption-arg-183", false], [43, "cmdoption-arg-67", false], [43, "cmdoption-arg-94", false], [43, "cmdoption-arg-extra_options", false]], "extra_session_config": [[43, "cmdoption-arg-extra_session_config", false]], "final_orientation": [[43, "cmdoption-arg-final_orientation", false]], "float16": [[43, "cmdoption-arg-265", false], [43, "cmdoption-arg-float16", false]], "force_evaluate_other_eps": [[43, "cmdoption-arg-force_evaluate_other_eps", false]], "force_fp16_inputs": [[43, "cmdoption-arg-force_fp16_inputs", false]], "force_fp32_nodes": [[43, "cmdoption-arg-force_fp32_nodes", false]], "force_fp32_ops": [[43, "cmdoption-arg-force_fp32_ops", false]], "forcequantizenoinputcheck": [[43, "cmdoption-arg-65", false], [43, "cmdoption-arg-92", false], [43, "cmdoption-arg-ForceQuantizeNoInputCheck", false]], "fp16": [[43, "cmdoption-arg-214", false], [43, "cmdoption-arg-fp16", false]], "fuse_layernorm": [[43, "cmdoption-arg-fuse_layernorm", false]], "gpus": [[43, "cmdoption-arg-gpus", false]], "group": [[44, "cmdoption-arg-group", false]], "group_size": [[43, "cmdoption-arg-group_size", false]], "hfmodelhandler (class in olive.model)": [[41, "olive.model.HfModelHandler", false]], "hidden_size": [[43, "cmdoption-arg-hidden_size", false]], "htp_socs": [[43, "cmdoption-arg-htp_socs", false]], "ignored_scope": [[43, "cmdoption-arg-ignored_scope", false]], "ignored_scope_type": [[43, "cmdoption-arg-ignored_scope_type", false]], "input": [[43, "cmdoption-arg-input", false]], "input_dim": [[43, "cmdoption-arg-input_dim", false]], "input_int32": [[43, "cmdoption-arg-input_int32", false]], "input_layouts": [[43, "cmdoption-arg-input_layouts", false]], "input_model_dtype": [[43, "cmdoption-arg-input_model_dtype", false]], "input_name": [[43, "cmdoption-arg-input_name", false]], "input_names": [[43, "cmdoption-arg-input_names", false]], "input_nodes": [[43, "cmdoption-arg-input_nodes", false]], "input_shape": [[43, "cmdoption-arg-input_shape", false]], "input_shapes": [[43, "cmdoption-arg-input_shapes", false]], "input_types": [[43, "cmdoption-arg-input_types", false]], "inputs_to_make_channel_last": [[43, "cmdoption-arg-inputs_to_make_channel_last", false]], "inside_layer_modules": [[43, "cmdoption-arg-inside_layer_modules", false]], "int4_accuracy_level": [[43, "cmdoption-arg-int4_accuracy_level", false]], "int4_block_size": [[43, "cmdoption-arg-int4_block_size", false]], "inter_thread_num_list": [[43, "cmdoption-arg-inter_thread_num_list", false]], "intra_thread_num_list": [[43, "cmdoption-arg-intra_thread_num_list", false]], "io_bind": [[43, "cmdoption-arg-io_bind", false]], "is_symmetric": [[43, "cmdoption-arg-is_symmetric", false]], "keep_io_types": [[43, "cmdoption-arg-19", false], [43, "cmdoption-arg-keep_io_types", false]], "layer_name_filter": [[43, "cmdoption-arg-264", false], [43, "cmdoption-arg-layer_name_filter", false]], "layers_block_name": [[43, "cmdoption-arg-layers_block_name", false]], "learning_rate (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.learning_rate", false]], "lib_name": [[43, "cmdoption-arg-lib_name", false]], "lib_targets": [[43, "cmdoption-arg-lib_targets", false]], "loftq_iter": [[43, "cmdoption-arg-loftq_iter", false]], "logger": [[43, "cmdoption-arg-logger", false]], "lora_alpha": [[43, "cmdoption-arg-226", false], [43, "cmdoption-arg-237", false], [43, "cmdoption-arg-lora_alpha", false]], "lora_dropout": [[43, "cmdoption-arg-227", false], [43, "cmdoption-arg-238", false], [43, "cmdoption-arg-lora_dropout", false]], "lora_r": [[43, "cmdoption-arg-225", false], [43, "cmdoption-arg-236", false], [43, "cmdoption-arg-lora_r", false]], "lr_scheduler_type (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.lr_scheduler_type", false]], "make_inputs": [[43, "cmdoption-arg-make_inputs", false]], "matmulconstbonly": [[43, "cmdoption-arg-66", false], [43, "cmdoption-arg-93", false], [43, "cmdoption-arg-MatMulConstBOnly", false]], "max_finite_val": [[43, "cmdoption-arg-max_finite_val", false]], "max_layer": [[43, "cmdoption-arg-263", false], [43, "cmdoption-arg-max_layer", false]], "merge_adapter_weights": [[43, "cmdoption-arg-merge_adapter_weights", false]], "metadata_only": [[43, "cmdoption-arg-metadata_only", false]], "metric": [[43, "cmdoption-arg-139", false], [43, "cmdoption-arg-159", false], [43, "cmdoption-arg-metric", false]], "min_layer": [[43, "cmdoption-arg-262", false], [43, "cmdoption-arg-min_layer", false]], "min_positive_val": [[43, "cmdoption-arg-min_positive_val", false]], "model_type": [[43, "cmdoption-arg-272", false], [43, "cmdoption-arg-model_type", false]], "modelconfig (class in olive.model)": [[41, "olive.model.ModelConfig", false]], "modules_to_fuse": [[43, "cmdoption-arg-modules_to_fuse", false]], "modules_to_not_convert": [[43, "cmdoption-arg-modules_to_not_convert", false]], "modules_to_save": [[43, "cmdoption-arg-228", false], [43, "cmdoption-arg-239", false], [43, "cmdoption-arg-modules_to_save", false]], "multivariate": [[44, "cmdoption-arg-multivariate", false]], "name_pattern": [[43, "cmdoption-arg-name_pattern", false]], "need_layer_fusing": [[43, "cmdoption-arg-need_layer_fusing", false]], "no_repeat_ngram_size": [[43, "cmdoption-arg-no_repeat_ngram_size", false]], "node_block_list": [[43, "cmdoption-arg-node_block_list", false]], "nodes_to_exclude": [[43, "cmdoption-arg-100", false], [43, "cmdoption-arg-175", false], [43, "cmdoption-arg-56", false], [43, "cmdoption-arg-78", false], [43, "cmdoption-arg-nodes_to_exclude", false]], "nodes_to_quantize": [[43, "cmdoption-arg-174", false], [43, "cmdoption-arg-55", false], [43, "cmdoption-arg-77", false], [43, "cmdoption-arg-nodes_to_quantize", false]], "num_epochs": [[43, "cmdoption-arg-num_epochs", false]], "num_heads": [[43, "cmdoption-arg-num_heads", false]], "num_key_value_heads": [[43, "cmdoption-arg-num_key_value_heads", false]], "num_samples": [[44, "cmdoption-arg-0", false], [44, "cmdoption-arg-num_samples", false]], "num_splits": [[43, "cmdoption-arg-num_splits", false]], "num_steps": [[43, "cmdoption-arg-num_steps", false]], "only_onnxruntime": [[43, "cmdoption-arg-only_onnxruntime", false]], "onnxmodelhandler (class in olive.model)": [[41, "olive.model.ONNXModelHandler", false]], "op_block_list": [[43, "cmdoption-arg-30", false], [43, "cmdoption-arg-op_block_list", false]], "op_type_dict": [[43, "cmdoption-arg-141", false], [43, "cmdoption-arg-161", false], [43, "cmdoption-arg-op_type_dict", false]], "op_types_to_quantize": [[43, "cmdoption-arg-173", false], [43, "cmdoption-arg-53", false], [43, "cmdoption-arg-75", false], [43, "cmdoption-arg-op_types_to_quantize", false]], "openvinomodelhandler (class in olive.model)": [[41, "olive.model.OpenVINOModelHandler", false]], "opt_level": [[43, "cmdoption-arg-opt_level", false]], "opt_level_list": [[43, "cmdoption-arg-opt_level_list", false]], "optim (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.optim", false]], "optimization_options": [[43, "cmdoption-arg-optimization_options", false]], "optimize_model": [[43, "cmdoption-arg-optimize_model", false]], "optional_inputs": [[43, "cmdoption-arg-optional_inputs", false]], "out_node": [[43, "cmdoption-arg-out_node", false]], "output_model": [[43, "cmdoption-arg-output_model", false]], "output_names": [[43, "cmdoption-arg-output_names", false]], "output_nodes": [[43, "cmdoption-arg-output_nodes", false]], "outputs_to_make_channel_last": [[43, "cmdoption-arg-outputs_to_make_channel_last", false]], "outside_layer_modules": [[43, "cmdoption-arg-outside_layer_modules", false]], "overrides_config": [[43, "cmdoption-arg-overrides_config", false]], "overwrite_output_dir (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.overwrite_output_dir", false]], "parallel_jobs": [[43, "cmdoption-arg-parallel_jobs", false]], "past_key_value_name": [[43, "cmdoption-arg-past_key_value_name", false]], "per_channel": [[43, "cmdoption-arg-176", false], [43, "cmdoption-arg-57", false], [43, "cmdoption-arg-79", false], [43, "cmdoption-arg-per_channel", false]], "percdamp": [[43, "cmdoption-arg-percdamp", false]], "post": [[43, "cmdoption-arg-post", false]], "pre": [[43, "cmdoption-arg-pre", false]], "precision": [[43, "cmdoption-arg-precision", false]], "prepare_qnn_config": [[43, "cmdoption-arg-86", false], [43, "cmdoption-arg-prepare_qnn_config", false]], "preset": [[43, "cmdoption-arg-preset", false]], "provider_options_list": [[43, "cmdoption-arg-provider_options_list", false]], "providers_list": [[43, "cmdoption-arg-providers_list", false]], "ptl_data_module": [[43, "cmdoption-arg-ptl_data_module", false]], "ptl_module": [[43, "cmdoption-arg-ptl_module", false]], "pytorchmodelhandler (class in olive.model)": [[41, "olive.model.PyTorchModelHandler", false]], "q_group_size": [[43, "cmdoption-arg-q_group_size", false]], "qconfig_func": [[43, "cmdoption-arg-qconfig_func", false]], "qnn_extra_options": [[43, "cmdoption-arg-87", false], [43, "cmdoption-arg-qnn_extra_options", false]], "quant_format": [[43, "cmdoption-arg-143", false], [43, "cmdoption-arg-163", false], [43, "cmdoption-arg-179", false], [43, "cmdoption-arg-84", false], [43, "cmdoption-arg-quant_format", false]], "quant_level": [[43, "cmdoption-arg-136", false], [43, "cmdoption-arg-156", false], [43, "cmdoption-arg-quant_level", false]], "quant_mode": [[43, "cmdoption-arg-170", false], [43, "cmdoption-arg-51", false], [43, "cmdoption-arg-73", false], [43, "cmdoption-arg-quant_mode", false]], "quant_preprocess": [[43, "cmdoption-arg-177", false], [43, "cmdoption-arg-59", false], [43, "cmdoption-arg-81", false], [43, "cmdoption-arg-quant_preprocess", false]], "quant_type": [[43, "cmdoption-arg-quant_type", false]], "recipes": [[43, "cmdoption-arg-134", false], [43, "cmdoption-arg-154", false], [43, "cmdoption-arg-recipes", false]], "reduce_range": [[43, "cmdoption-arg-123", false], [43, "cmdoption-arg-135", false], [43, "cmdoption-arg-155", false], [43, "cmdoption-arg-58", false], [43, "cmdoption-arg-80", false], [43, "cmdoption-arg-reduce_range", false]], "resume_from_checkpoint (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.resume_from_checkpoint", false]], "rotate_mode": [[43, "cmdoption-arg-254", false], [43, "cmdoption-arg-rotate_mode", false]], "round_interval": [[43, "cmdoption-arg-round_interval", false]], "save_as_external_data": [[43, "cmdoption-arg-1", false], [43, "cmdoption-arg-101", false], [43, "cmdoption-arg-107", false], [43, "cmdoption-arg-11", false], [43, "cmdoption-arg-112", false], [43, "cmdoption-arg-117", false], [43, "cmdoption-arg-124", false], [43, "cmdoption-arg-144", false], [43, "cmdoption-arg-165", false], [43, "cmdoption-arg-184", false], [43, "cmdoption-arg-190", false], [43, "cmdoption-arg-196", false], [43, "cmdoption-arg-20", false], [43, "cmdoption-arg-201", false], [43, "cmdoption-arg-206", false], [43, "cmdoption-arg-216", false], [43, "cmdoption-arg-25", false], [43, "cmdoption-arg-279", false], [43, "cmdoption-arg-31", false], [43, "cmdoption-arg-36", false], [43, "cmdoption-arg-41", false], [43, "cmdoption-arg-46", false], [43, "cmdoption-arg-6", false], [43, "cmdoption-arg-68", false], [43, "cmdoption-arg-95", false], [43, "cmdoption-arg-save_as_external_data", false]], "save_format": [[43, "cmdoption-arg-save_format", false]], "save_metadata_for_token_generation": [[43, "cmdoption-arg-save_metadata_for_token_generation", false]], "save_quant_config": [[43, "cmdoption-arg-235", false], [43, "cmdoption-arg-save_quant_config", false]], "script_dir": [[43, "cmdoption-arg-17", false], [43, "cmdoption-arg-212", false], [43, "cmdoption-arg-246", false], [43, "cmdoption-arg-257", false], [43, "cmdoption-arg-260", false], [43, "cmdoption-arg-268", false], [43, "cmdoption-arg-270", false], [43, "cmdoption-arg-script_dir", false]], "search": [[43, "cmdoption-arg-search", false]], "seed": [[43, "cmdoption-arg-250", false], [43, "cmdoption-arg-252", false], [43, "cmdoption-arg-253", false], [43, "cmdoption-arg-seed", false], [44, "cmdoption-arg-1", false], [44, "cmdoption-arg-seed", false]], "size_threshold": [[43, "cmdoption-arg-104", false], [43, "cmdoption-arg-110", false], [43, "cmdoption-arg-115", false], [43, "cmdoption-arg-120", false], [43, "cmdoption-arg-127", false], [43, "cmdoption-arg-14", false], [43, "cmdoption-arg-147", false], [43, "cmdoption-arg-168", false], [43, "cmdoption-arg-187", false], [43, "cmdoption-arg-193", false], [43, "cmdoption-arg-199", false], [43, "cmdoption-arg-204", false], [43, "cmdoption-arg-209", false], [43, "cmdoption-arg-219", false], [43, "cmdoption-arg-23", false], [43, "cmdoption-arg-28", false], [43, "cmdoption-arg-282", false], [43, "cmdoption-arg-34", false], [43, "cmdoption-arg-39", false], [43, "cmdoption-arg-4", false], [43, "cmdoption-arg-44", false], [43, "cmdoption-arg-49", false], [43, "cmdoption-arg-71", false], [43, "cmdoption-arg-9", false], [43, "cmdoption-arg-98", false], [43, "cmdoption-arg-size_threshold", false]], "snpemodelhandler (class in olive.model)": [[41, "olive.model.SNPEModelHandler", false]], "source_dtype": [[43, "cmdoption-arg-source_dtype", false]], "sparsity": [[43, "cmdoption-arg-251", false], [43, "cmdoption-arg-sparsity", false]], "static_groups": [[43, "cmdoption-arg-static_groups", false]], "strict": [[43, "cmdoption-arg-strict", false]], "surgeries": [[43, "cmdoption-arg-surgeries", false]], "sym": [[43, "cmdoption-arg-sym", false]], "target_device": [[43, "cmdoption-arg-277", false], [43, "cmdoption-arg-target_device", false]], "target_dtype": [[43, "cmdoption-arg-target_dtype", false]], "target_modules": [[43, "cmdoption-arg-target_modules", false]], "target_opset": [[43, "cmdoption-arg-0", false], [43, "cmdoption-arg-189", false], [43, "cmdoption-arg-213", false], [43, "cmdoption-arg-278", false], [43, "cmdoption-arg-target_opset", false]], "tool_command": [[43, "cmdoption-arg-tool_command", false]], "tool_command_args": [[43, "cmdoption-arg-tool_command_args", false]], "torch_dtype": [[43, "cmdoption-arg-224", false], [43, "cmdoption-arg-229", false], [43, "cmdoption-arg-240", false], [43, "cmdoption-arg-torch_dtype", false]], "train_data_config": [[43, "cmdoption-arg-231", false], [43, "cmdoption-arg-242", false], [43, "cmdoption-arg-247", false], [43, "cmdoption-arg-train_data_config", false]], "training_args": [[43, "cmdoption-arg-233", false], [43, "cmdoption-arg-244", false], [43, "cmdoption-arg-255", false], [43, "cmdoption-arg-training_args", false]], "training_loop_func": [[43, "cmdoption-arg-training_loop_func", false]], "trt_fp16_enable": [[43, "cmdoption-arg-trt_fp16_enable", false]], "true_sequential": [[43, "cmdoption-arg-true_sequential", false]], "tuning_criterion": [[43, "cmdoption-arg-138", false], [43, "cmdoption-arg-158", false], [43, "cmdoption-arg-tuning_criterion", false]], "use_dynamo_exporter": [[43, "cmdoption-arg-use_dynamo_exporter", false]], "use_enhanced_quantizer": [[43, "cmdoption-arg-use_enhanced_quantizer", false]], "use_external_data_format": [[43, "cmdoption-arg-use_external_data_format", false]], "use_forced_decoder_ids": [[43, "cmdoption-arg-use_forced_decoder_ids", false]], "use_gpu": [[43, "cmdoption-arg-195", false], [43, "cmdoption-arg-use_gpu", false]], "use_gqa": [[43, "cmdoption-arg-use_gqa", false]], "use_int4": [[43, "cmdoption-arg-use_int4", false]], "use_logits_processor": [[43, "cmdoption-arg-use_logits_processor", false]], "use_prefix_vocab_mask": [[43, "cmdoption-arg-use_prefix_vocab_mask", false]], "use_symbolic_shape_infer": [[43, "cmdoption-arg-use_symbolic_shape_infer", false]], "use_temperature": [[43, "cmdoption-arg-use_temperature", false]], "use_transpose_op": [[43, "cmdoption-arg-use_transpose_op", false]], "use_vocab_mask": [[43, "cmdoption-arg-use_vocab_mask", false]], "user_script": [[43, "cmdoption-arg-16", false], [43, "cmdoption-arg-211", false], [43, "cmdoption-arg-245", false], [43, "cmdoption-arg-256", false], [43, "cmdoption-arg-259", false], [43, "cmdoption-arg-267", false], [43, "cmdoption-arg-269", false], [43, "cmdoption-arg-user_script", false]], "val_data_config": [[43, "cmdoption-arg-val_data_config", false]], "version": [[43, "cmdoption-arg-version", false]], "w_bit": [[43, "cmdoption-arg-w_bit", false]], "warmup_ratio (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.warmup_ratio", false]], "weight_only_config": [[43, "cmdoption-arg-140", false], [43, "cmdoption-arg-160", false], [43, "cmdoption-arg-weight_only_config", false]], "weight_only_quant_configs": [[43, "cmdoption-arg-weight_only_quant_configs", false]], "weight_type": [[43, "cmdoption-arg-172", false], [43, "cmdoption-arg-52", false], [43, "cmdoption-arg-74", false], [43, "cmdoption-arg-weight_type", false]], "weightsymmetric": [[43, "cmdoption-arg-182", false], [43, "cmdoption-arg-63", false], [43, "cmdoption-arg-90", false], [43, "cmdoption-arg-WeightSymmetric", false]], "with_replacement": [[44, "cmdoption-arg-with_replacement", false]], "workspace": [[43, "cmdoption-arg-133", false], [43, "cmdoption-arg-153", false], [43, "cmdoption-arg-workspace", false]], "zero_point": [[43, "cmdoption-arg-zero_point", false]]}, "objects": {"": [[43, 3, 1, "cmdoption-arg-ActivationSymmetric", "ActivationSymmetric"], [43, 3, 1, "cmdoption-arg-AddQDQPairToWeight", "AddQDQPairToWeight"], [43, 3, 1, "cmdoption-arg-EnableSubgraph", "EnableSubgraph"], [43, 3, 1, "cmdoption-arg-ForceQuantizeNoInputCheck", "ForceQuantizeNoInputCheck"], [43, 3, 1, "cmdoption-arg-MatMulConstBOnly", "MatMulConstBOnly"], [43, 3, 1, "cmdoption-arg-WeightSymmetric", "WeightSymmetric"], [43, 3, 1, "cmdoption-arg-a_bits", "a_bits"], [43, 3, 1, "cmdoption-arg-a_per_token", "a_per_token"], [43, 3, 1, "cmdoption-arg-a_symmetric", "a_symmetric"], [43, 3, 1, "cmdoption-arg-accuracy_level", "accuracy_level"], [43, 3, 1, "cmdoption-arg-activation_type", "activation_type"], [43, 3, 1, "cmdoption-arg-add_zero_point", "add_zero_point"], [43, 3, 1, "cmdoption-arg-algorithm", "algorithm"], [43, 3, 1, "cmdoption-arg-all_tensors_to_one_file", "all_tensors_to_one_file"], [43, 3, 1, "cmdoption-arg-allow_tf32", "allow_tf32"], [43, 3, 1, "cmdoption-arg-append_first_op_types_to_quantize_list", "append_first_op_types_to_quantize_list"], [43, 3, 1, "cmdoption-arg-approach", "approach"], [43, 3, 1, "cmdoption-arg-atol", "atol"], [43, 3, 1, "cmdoption-arg-backend", "backend"], [43, 3, 1, "cmdoption-arg-binary_file", "binary_file"], [43, 3, 1, "cmdoption-arg-bits", "bits"], [43, 3, 1, "cmdoption-arg-block_size", "block_size"], [43, 3, 1, "cmdoption-arg-block_to_split", "block_to_split"], [43, 3, 1, "cmdoption-arg-blocksize", "blocksize"], [43, 3, 1, "cmdoption-arg-calibrate_method", "calibrate_method"], [43, 3, 1, "cmdoption-arg-calibration_batch_size", "calibration_batch_size"], [43, 3, 1, "cmdoption-arg-calibration_data_config", "calibration_data_config"], [43, 3, 1, "cmdoption-arg-calibration_nsamples", "calibration_nsamples"], [43, 3, 1, "cmdoption-arg-calibration_sampling_size", "calibration_sampling_size"], [43, 3, 1, "cmdoption-arg-checkpoint_path", "checkpoint_path"], [43, 3, 1, "cmdoption-arg-components", "components"], [43, 3, 1, "cmdoption-arg-compress_to_fp16", "compress_to_fp16"], [43, 3, 1, "cmdoption-arg-compute_dtype", "compute_dtype"], [43, 3, 1, "cmdoption-arg-convert_attribute", "convert_attribute"], [43, 3, 1, "cmdoption-arg-cost_model", "cost_model"], [43, 3, 1, "cmdoption-arg-cpu_cores", "cpu_cores"], [43, 3, 1, "cmdoption-arg-damp_percent", "damp_percent"], [43, 3, 1, "cmdoption-arg-data_config", "data_config"], [43, 3, 1, "cmdoption-arg-desc_act", "desc_act"], [43, 3, 1, "cmdoption-arg-device", "device"], [43, 3, 1, "cmdoption-arg-dim_param", "dim_param"], [43, 3, 1, "cmdoption-arg-dim_value", "dim_value"], [43, 3, 1, "cmdoption-arg-do_validate", "do_validate"], [43, 3, 1, "cmdoption-arg-domain", "domain"], [43, 3, 1, "cmdoption-arg-double_quant", "double_quant"], [43, 3, 1, "cmdoption-arg-duo_scaling", "duo_scaling"], [43, 3, 1, "cmdoption-arg-dynamic", "dynamic"], [43, 3, 1, "cmdoption-arg-dynamic_lora_r", "dynamic_lora_r"], [43, 3, 1, "cmdoption-arg-element_wise_binary_ops", "element_wise_binary_ops"], [43, 3, 1, "cmdoption-arg-enable_cuda_graph", "enable_cuda_graph"], [43, 3, 1, "cmdoption-arg-enable_dpu", "enable_dpu"], [43, 3, 1, "cmdoption-arg-enable_htp", "enable_htp"], [43, 3, 1, "cmdoption-arg-enable_profiling", "enable_profiling"], [43, 3, 1, "cmdoption-arg-eval_data_config", "eval_data_config"], [43, 3, 1, "cmdoption-arg-example_input_func", "example_input_func"], [43, 3, 1, "cmdoption-arg-exclude_embeds", "exclude_embeds"], [43, 3, 1, "cmdoption-arg-exclude_lm_head", "exclude_lm_head"], [43, 3, 1, "cmdoption-arg-excluded_precisions", "excluded_precisions"], [43, 3, 1, "cmdoption-arg-execution_mode_list", "execution_mode_list"], [43, 3, 1, "cmdoption-arg-export_compatible", "export_compatible"], [43, 3, 1, "cmdoption-arg-external_data_name", "external_data_name"], [43, 3, 1, "cmdoption-arg-extra_args", "extra_args"], [43, 3, 1, "cmdoption-arg-extra_configs", "extra_configs"], [43, 3, 1, "cmdoption-arg-extra_options", "extra_options"], [43, 3, 1, "cmdoption-arg-extra_session_config", "extra_session_config"], [43, 3, 1, "cmdoption-arg-final_orientation", "final_orientation"], [43, 3, 1, "cmdoption-arg-float16", "float16"], [43, 3, 1, "cmdoption-arg-force_evaluate_other_eps", "force_evaluate_other_eps"], [43, 3, 1, "cmdoption-arg-force_fp16_inputs", "force_fp16_inputs"], [43, 3, 1, "cmdoption-arg-force_fp32_nodes", "force_fp32_nodes"], [43, 3, 1, "cmdoption-arg-force_fp32_ops", "force_fp32_ops"], [43, 3, 1, "cmdoption-arg-fp16", "fp16"], [43, 3, 1, "cmdoption-arg-fuse_layernorm", "fuse_layernorm"], [43, 3, 1, "cmdoption-arg-gpus", "gpus"], [44, 3, 1, "cmdoption-arg-group", "group"], [43, 3, 1, "cmdoption-arg-group_size", "group_size"], [43, 3, 1, "cmdoption-arg-hidden_size", "hidden_size"], [43, 3, 1, "cmdoption-arg-htp_socs", "htp_socs"], [43, 3, 1, "cmdoption-arg-ignored_scope", "ignored_scope"], [43, 3, 1, "cmdoption-arg-ignored_scope_type", "ignored_scope_type"], [43, 3, 1, "cmdoption-arg-input", "input"], [43, 3, 1, "cmdoption-arg-input_dim", "input_dim"], [43, 3, 1, "cmdoption-arg-input_int32", "input_int32"], [43, 3, 1, "cmdoption-arg-input_layouts", "input_layouts"], [43, 3, 1, "cmdoption-arg-input_model_dtype", "input_model_dtype"], [43, 3, 1, "cmdoption-arg-input_name", "input_name"], [43, 3, 1, "cmdoption-arg-input_names", "input_names"], [43, 3, 1, "cmdoption-arg-input_nodes", "input_nodes"], [43, 3, 1, "cmdoption-arg-input_shape", "input_shape"], [43, 3, 1, "cmdoption-arg-input_shapes", "input_shapes"], [43, 3, 1, "cmdoption-arg-input_types", "input_types"], [43, 3, 1, "cmdoption-arg-inputs_to_make_channel_last", "inputs_to_make_channel_last"], [43, 3, 1, "cmdoption-arg-inside_layer_modules", "inside_layer_modules"], [43, 3, 1, "cmdoption-arg-int4_accuracy_level", "int4_accuracy_level"], [43, 3, 1, "cmdoption-arg-int4_block_size", "int4_block_size"], [43, 3, 1, "cmdoption-arg-inter_thread_num_list", "inter_thread_num_list"], [43, 3, 1, "cmdoption-arg-intra_thread_num_list", "intra_thread_num_list"], [43, 3, 1, "cmdoption-arg-io_bind", "io_bind"], [43, 3, 1, "cmdoption-arg-is_symmetric", "is_symmetric"], [43, 3, 1, "cmdoption-arg-keep_io_types", "keep_io_types"], [43, 3, 1, "cmdoption-arg-layer_name_filter", "layer_name_filter"], [43, 3, 1, "cmdoption-arg-layers_block_name", "layers_block_name"], [43, 3, 1, "cmdoption-arg-lib_name", "lib_name"], [43, 3, 1, "cmdoption-arg-lib_targets", "lib_targets"], [43, 3, 1, "cmdoption-arg-loftq_iter", "loftq_iter"], [43, 3, 1, "cmdoption-arg-logger", "logger"], [43, 3, 1, "cmdoption-arg-lora_alpha", "lora_alpha"], [43, 3, 1, "cmdoption-arg-lora_dropout", "lora_dropout"], [43, 3, 1, "cmdoption-arg-lora_r", "lora_r"], [43, 3, 1, "cmdoption-arg-make_inputs", "make_inputs"], [43, 3, 1, "cmdoption-arg-max_finite_val", "max_finite_val"], [43, 3, 1, "cmdoption-arg-max_layer", "max_layer"], [43, 3, 1, "cmdoption-arg-merge_adapter_weights", "merge_adapter_weights"], [43, 3, 1, "cmdoption-arg-metadata_only", "metadata_only"], [43, 3, 1, "cmdoption-arg-metric", "metric"], [43, 3, 1, "cmdoption-arg-min_layer", "min_layer"], [43, 3, 1, "cmdoption-arg-min_positive_val", "min_positive_val"], [43, 3, 1, "cmdoption-arg-model_type", "model_type"], [43, 3, 1, "cmdoption-arg-modules_to_fuse", "modules_to_fuse"], [43, 3, 1, "cmdoption-arg-modules_to_not_convert", "modules_to_not_convert"], [43, 3, 1, "cmdoption-arg-modules_to_save", "modules_to_save"], [44, 3, 1, "cmdoption-arg-multivariate", "multivariate"], [43, 3, 1, "cmdoption-arg-name_pattern", "name_pattern"], [43, 3, 1, "cmdoption-arg-need_layer_fusing", "need_layer_fusing"], [43, 3, 1, "cmdoption-arg-no_repeat_ngram_size", "no_repeat_ngram_size"], [43, 3, 1, "cmdoption-arg-node_block_list", "node_block_list"], [43, 3, 1, "cmdoption-arg-nodes_to_exclude", "nodes_to_exclude"], [43, 3, 1, "cmdoption-arg-nodes_to_quantize", "nodes_to_quantize"], [43, 3, 1, "cmdoption-arg-num_epochs", "num_epochs"], [43, 3, 1, "cmdoption-arg-num_heads", "num_heads"], [43, 3, 1, "cmdoption-arg-num_key_value_heads", "num_key_value_heads"], [44, 3, 1, "cmdoption-arg-num_samples", "num_samples"], [43, 3, 1, "cmdoption-arg-num_splits", "num_splits"], [43, 3, 1, "cmdoption-arg-num_steps", "num_steps"], [43, 3, 1, "cmdoption-arg-only_onnxruntime", "only_onnxruntime"], [43, 3, 1, "cmdoption-arg-op_block_list", "op_block_list"], [43, 3, 1, "cmdoption-arg-op_type_dict", "op_type_dict"], [43, 3, 1, "cmdoption-arg-op_types_to_quantize", "op_types_to_quantize"], [43, 3, 1, "cmdoption-arg-opt_level", "opt_level"], [43, 3, 1, "cmdoption-arg-opt_level_list", "opt_level_list"], [43, 3, 1, "cmdoption-arg-optimization_options", "optimization_options"], [43, 3, 1, "cmdoption-arg-optimize_model", "optimize_model"], [43, 3, 1, "cmdoption-arg-optional_inputs", "optional_inputs"], [43, 3, 1, "cmdoption-arg-out_node", "out_node"], [43, 3, 1, "cmdoption-arg-output_model", "output_model"], [43, 3, 1, "cmdoption-arg-output_names", "output_names"], [43, 3, 1, "cmdoption-arg-output_nodes", "output_nodes"], [43, 3, 1, "cmdoption-arg-outputs_to_make_channel_last", "outputs_to_make_channel_last"], [43, 3, 1, "cmdoption-arg-outside_layer_modules", "outside_layer_modules"], [43, 3, 1, "cmdoption-arg-overrides_config", "overrides_config"], [43, 3, 1, "cmdoption-arg-parallel_jobs", "parallel_jobs"], [43, 3, 1, "cmdoption-arg-past_key_value_name", "past_key_value_name"], [43, 3, 1, "cmdoption-arg-per_channel", "per_channel"], [43, 3, 1, "cmdoption-arg-percdamp", "percdamp"], [43, 3, 1, "cmdoption-arg-post", "post"], [43, 3, 1, "cmdoption-arg-pre", "pre"], [43, 3, 1, "cmdoption-arg-precision", "precision"], [43, 3, 1, "cmdoption-arg-prepare_qnn_config", "prepare_qnn_config"], [43, 3, 1, "cmdoption-arg-preset", "preset"], [43, 3, 1, "cmdoption-arg-provider_options_list", "provider_options_list"], [43, 3, 1, "cmdoption-arg-providers_list", "providers_list"], [43, 3, 1, "cmdoption-arg-ptl_data_module", "ptl_data_module"], [43, 3, 1, "cmdoption-arg-ptl_module", "ptl_module"], [43, 3, 1, "cmdoption-arg-q_group_size", "q_group_size"], [43, 3, 1, "cmdoption-arg-qconfig_func", "qconfig_func"], [43, 3, 1, "cmdoption-arg-qnn_extra_options", "qnn_extra_options"], [43, 3, 1, "cmdoption-arg-quant_format", "quant_format"], [43, 3, 1, "cmdoption-arg-quant_level", "quant_level"], [43, 3, 1, "cmdoption-arg-quant_mode", "quant_mode"], [43, 3, 1, "cmdoption-arg-quant_preprocess", "quant_preprocess"], [43, 3, 1, "cmdoption-arg-quant_type", "quant_type"], [43, 3, 1, "cmdoption-arg-recipes", "recipes"], [43, 3, 1, "cmdoption-arg-reduce_range", "reduce_range"], [43, 3, 1, "cmdoption-arg-rotate_mode", "rotate_mode"], [43, 3, 1, "cmdoption-arg-round_interval", "round_interval"], [43, 3, 1, "cmdoption-arg-save_as_external_data", "save_as_external_data"], [43, 3, 1, "cmdoption-arg-save_format", "save_format"], [43, 3, 1, "cmdoption-arg-save_metadata_for_token_generation", "save_metadata_for_token_generation"], [43, 3, 1, "cmdoption-arg-save_quant_config", "save_quant_config"], [43, 3, 1, "cmdoption-arg-script_dir", "script_dir"], [43, 3, 1, "cmdoption-arg-search", "search"], [43, 3, 1, "cmdoption-arg-seed", "seed"], [43, 3, 1, "cmdoption-arg-size_threshold", "size_threshold"], [43, 3, 1, "cmdoption-arg-source_dtype", "source_dtype"], [43, 3, 1, "cmdoption-arg-sparsity", "sparsity"], [43, 3, 1, "cmdoption-arg-static_groups", "static_groups"], [43, 3, 1, "cmdoption-arg-strict", "strict"], [43, 3, 1, "cmdoption-arg-surgeries", "surgeries"], [43, 3, 1, "cmdoption-arg-sym", "sym"], [43, 3, 1, "cmdoption-arg-target_device", "target_device"], [43, 3, 1, "cmdoption-arg-target_dtype", "target_dtype"], [43, 3, 1, "cmdoption-arg-target_modules", "target_modules"], [43, 3, 1, "cmdoption-arg-target_opset", "target_opset"], [43, 3, 1, "cmdoption-arg-tool_command", "tool_command"], [43, 3, 1, "cmdoption-arg-tool_command_args", "tool_command_args"], [43, 3, 1, "cmdoption-arg-torch_dtype", "torch_dtype"], [43, 3, 1, "cmdoption-arg-train_data_config", "train_data_config"], [43, 3, 1, "cmdoption-arg-training_args", "training_args"], [43, 3, 1, "cmdoption-arg-training_loop_func", "training_loop_func"], [43, 3, 1, "cmdoption-arg-trt_fp16_enable", "trt_fp16_enable"], [43, 3, 1, "cmdoption-arg-true_sequential", "true_sequential"], [43, 3, 1, "cmdoption-arg-tuning_criterion", "tuning_criterion"], [43, 3, 1, "cmdoption-arg-use_dynamo_exporter", "use_dynamo_exporter"], [43, 3, 1, "cmdoption-arg-use_enhanced_quantizer", "use_enhanced_quantizer"], [43, 3, 1, "cmdoption-arg-use_external_data_format", "use_external_data_format"], [43, 3, 1, "cmdoption-arg-use_forced_decoder_ids", "use_forced_decoder_ids"], [43, 3, 1, "cmdoption-arg-use_gpu", "use_gpu"], [43, 3, 1, "cmdoption-arg-use_gqa", "use_gqa"], [43, 3, 1, "cmdoption-arg-use_int4", "use_int4"], [43, 3, 1, "cmdoption-arg-use_logits_processor", "use_logits_processor"], [43, 3, 1, "cmdoption-arg-use_prefix_vocab_mask", "use_prefix_vocab_mask"], [43, 3, 1, "cmdoption-arg-use_symbolic_shape_infer", "use_symbolic_shape_infer"], [43, 3, 1, "cmdoption-arg-use_temperature", "use_temperature"], [43, 3, 1, "cmdoption-arg-use_transpose_op", "use_transpose_op"], [43, 3, 1, "cmdoption-arg-use_vocab_mask", "use_vocab_mask"], [43, 3, 1, "cmdoption-arg-user_script", "user_script"], [43, 3, 1, "cmdoption-arg-val_data_config", "val_data_config"], [43, 3, 1, "cmdoption-arg-version", "version"], [43, 3, 1, "cmdoption-arg-w_bit", "w_bit"], [43, 3, 1, "cmdoption-arg-weight_only_config", "weight_only_config"], [43, 3, 1, "cmdoption-arg-weight_only_quant_configs", "weight_only_quant_configs"], [43, 3, 1, "cmdoption-arg-weight_type", "weight_type"], [44, 3, 1, "cmdoption-arg-with_replacement", "with_replacement"], [43, 3, 1, "cmdoption-arg-workspace", "workspace"], [43, 3, 1, "cmdoption-arg-zero_point", "zero_point"]], "extra.Sigmoid": [[43, 3, 1, "cmdoption-arg-extra.Sigmoid.nnapi", "nnapi"]], "olive.model": [[41, 0, 1, "", "CompositeModelHandler"], [41, 0, 1, "", "DistributedHfModelHandler"], [41, 0, 1, "", "DistributedOnnxModelHandler"], [41, 0, 1, "", "HfModelHandler"], [41, 0, 1, "", "ModelConfig"], [41, 0, 1, "", "ONNXModelHandler"], [41, 0, 1, "", "OpenVINOModelHandler"], [41, 0, 1, "", "PyTorchModelHandler"], [41, 0, 1, "", "SNPEModelHandler"]], "olive.passes.pytorch.lora": [[43, 1, 1, "", "HFTrainingArguments"]], "olive.passes.pytorch.lora.HFTrainingArguments": [[43, 2, 1, "", "evaluation_strategy"], [43, 2, 1, "", "learning_rate"], [43, 2, 1, "", "lr_scheduler_type"], [43, 2, 1, "", "optim"], [43, 2, 1, "", "overwrite_output_dir"], [43, 2, 1, "", "resume_from_checkpoint"], [43, 2, 1, "", "warmup_ratio"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "pydantic_settings", "Python settings"], "2": ["py", "pydantic_field", "Python field"], "3": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:class", "1": "py:pydantic_settings", "2": "py:pydantic_field", "3": "std:cmdoption"}, "terms": {"": [1, 8, 17, 18, 28, 31, 42, 43, 45], "0": [1, 10, 11, 13, 17, 18, 19, 22, 23, 24, 26, 27, 29, 35, 39, 42, 43], "000": 29, "0002": [24, 43], "00456": 43, "00774": 43, "01": [1, 10, 19, 23, 42, 43], "03": 43, "04": [11, 35, 42], "05": 43, "06": 43, "07": 43, "0_onnxconvers": 27, "1": [1, 10, 11, 17, 18, 19, 20, 22, 26, 27, 29, 30, 35, 39, 42, 43, 44], "10": [27, 30, 42, 43], "100": [7, 8, 10, 43], "10000": 43, "1001": 26, "1024": [22, 39, 43], "12": [5, 35, 42, 43], "120": [13, 39], "128": [17, 39, 43], "13": [18, 30, 42], "14": [27, 30, 43], "1431c563dcfda9c9c3bf26c5d61ef58": 27, "15": [26, 39], "15024": 43, "15531": 43, "16": [19, 22, 24, 28, 39, 43], "16406": 43, "17": [39, 42, 43], "175b": 24, "176b": 24, "18": [26, 43], "1_orttransformersoptim": 27, "1b": [5, 6, 7, 8, 9], "1e": 43, "2": [10, 13, 18, 19, 22, 24, 27, 28, 31, 33, 34, 35, 39, 42, 43], "20": [10, 19, 42], "200": [5, 6, 29], "2023": [23, 43], "203": 29, "2048": 18, "2147483648": 22, "22": 43, "224": [17, 30, 43], "2301": 43, "2309": 43, "2313": 27, "24": 35, "2401": 43, "240101": 26, "2404": 43, "2405": 43, "256": [39, 43], "299": 26, "2_onnxquant": 27, "2gb": [28, 43], "3": [11, 17, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34, 39, 42, 43], "3000": [11, 42], "3072": 18, "32": [23, 28, 33, 39, 43], "32bit": 26, "36": 27, "360m": [5, 6], "3_ortsessionparamstun": 27, "3x": [8, 34], "4": [0, 1, 3, 5, 11, 22, 24, 27, 31, 33, 34, 35, 39, 42, 43], "40": [9, 35, 36, 45], "4000": [11, 42], "42": 43, "44": 5, "4b": 43, "4k": [33, 39], "5": [3, 11, 24, 27, 28, 42, 43], "50": 43, "512": 18, "53fc6781998a4624b61959bb064622c": 27, "6": [3, 26, 35, 39], "60": 24, "60sec": [5, 6], "63442": 43, "64": [27, 39, 43], "7": 43, "768": 42, "7a320d6d630bced3548f242238392730": 27, "7b": [18, 42], "8": [17, 18, 23, 24, 25, 26, 33, 34, 35, 39, 43], "80": 43, "8602941176470589": 27, "9": 42, "96": 35, "99": [27, 43], "A": [2, 8, 13, 25, 27, 29, 35, 39, 43], "As": [8, 22, 33, 35], "At": 10, "But": 17, "By": [42, 43], "For": [3, 5, 6, 9, 10, 11, 17, 18, 22, 27, 28, 30, 32, 35, 36, 39, 42, 43, 45], "If": [3, 5, 7, 8, 10, 11, 13, 15, 16, 17, 18, 19, 20, 22, 25, 26, 27, 30, 33, 35, 37, 39, 42, 43, 44], "In": [2, 5, 6, 9, 10, 11, 12, 17, 19, 22, 28, 29, 30, 33, 35, 42, 43], "It": [2, 3, 5, 6, 12, 15, 18, 22, 23, 24, 27, 32, 34, 35, 39, 42, 43], "Its": 42, "NOT": [10, 43], "No": [22, 27], "On": 42, "One": 43, "Such": 28, "That": 43, "The": [2, 3, 5, 6, 7, 8, 9, 10, 11, 13, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34, 35, 36, 39, 41, 42, 43, 44, 45], "Then": [11, 12, 15, 17, 18], "There": [17, 27, 33, 35, 39, 42], "These": [22, 28, 33, 35, 39], "To": [2, 3, 8, 15, 16, 22, 32, 38, 42, 43], "Will": 39, "With": [5, 6, 10, 16, 18, 28, 34, 37], "_": [17, 27], "__getitem__": 1, "__init__": 1, "__len__": 1, "__model_input__": 22, "__model_output__": 22, "_default_config": 3, "_dummy_input": 39, "_io_config": 39, "_model_file_format": 39, "_model_load": 39, "_run_for_config": 3, "a843d77ae4964c04e145b83567fb5b05": 27, "a_bit": [34, 43], "a_per_token": 43, "a_symmetr": 43, "aarch64": 43, "ab": 43, "abil": [1, 33], "abl": [12, 42], "abort": [5, 6], "about": [3, 4, 9, 16, 18, 22, 23, 24, 25, 26, 30, 33, 34, 38, 40, 42, 43, 44], "abov": [10, 17, 18, 24, 30, 35, 42], "absolut": [23, 43], "acceler": [3, 5, 9, 10, 22, 24, 25, 26, 27, 30, 31, 33, 39, 42, 43, 45], "accelerator_spec": [3, 27], "accelerator_typ": 3, "acceleratorspec": 3, "accept": [3, 39, 43], "access": [5, 6, 12, 18, 42], "accompani": 45, "accord": 29, "account": [11, 16, 18, 39, 42], "account_nam": [16, 39], "accur": 8, "accuraci": [1, 2, 10, 18, 27, 30, 33, 34, 36, 39, 42, 43, 45], "accuracy_custom": 19, "accuracy_data_config": [1, 19, 42], "accuracy_level": 43, "accuracy_scor": [1, 19, 42], "accuracylevel": 43, "achiev": [2, 33, 45], "across": [5, 22, 27, 33], "activ": [2, 8, 10, 23, 24, 33, 34, 37, 39, 43], "activation_typ": [33, 43], "activationsymmetr": 43, "actord": 43, "actual": 23, "ad": [1, 4, 18, 22, 42, 43], "adamw": 43, "adapt": [8, 24, 36, 42, 43], "adapter_format": 39, "adapter_path": [7, 8, 39, 41], "adapter_weight": 7, "adaptor": 43, "add": [4, 15, 16, 17, 18, 20, 22, 27, 29, 30, 39, 42, 43], "add_external_initi": [22, 43], "add_initi": 22, "add_output": 29, "add_qdq_pair_to_weight": 43, "add_qtype_convert": 43, "add_special_token": 39, "add_zero_output_0": 29, "add_zero_point": 43, "addit": [17, 20, 23, 33, 37, 43], "addnod": 29, "addqdqpairtoweight": 43, "adjust": 43, "advanc": [33, 39, 45], "advantag": [31, 43], "advent": 28, "affect": [16, 43], "after": [5, 6, 7, 11, 22, 24, 28, 29, 39, 42, 43], "again": 43, "against": [35, 43], "ai": [0, 5, 6, 7, 15, 16, 22, 23, 25, 26, 30, 35, 36, 37, 42, 43, 45], "algorithm": [8, 24, 28, 33, 34, 39, 40, 42, 43, 44, 45], "alia": [35, 42], "all": [0, 2, 5, 7, 8, 10, 11, 17, 27, 29, 30, 32, 39, 41, 42, 43], "all_tensors_to_one_fil": [9, 33, 43], "allow": [8, 9, 15, 16, 17, 18, 22, 23, 30, 43], "allow_tf32": 43, "alon": 30, "along": [2, 3, 17, 24, 39], "alpha": [39, 43], "alreadi": [27, 28, 43], "also": [2, 3, 16, 17, 18, 20, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 37, 39, 41, 42, 43], "alter": 42, "altern": [9, 19, 27, 30], "alwai": 43, "amd": [0, 5, 6, 42, 45], "aml": [11, 13, 18, 27, 39, 42], "aml_comput": [7, 11, 35, 39, 42], "aml_config": 13, "aml_config_path": [11, 13, 39], "aml_docker_config": [11, 35, 42], "aml_environment_config": 35, "aml_system": [11, 15, 18, 42], "among": [28, 42], "amper": 43, "ampl": 2, "an": [0, 2, 3, 5, 6, 7, 8, 13, 16, 17, 18, 19, 20, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 41, 42, 43, 45], "analysi": 43, "analyz": 22, "android": 43, "ani": [2, 3, 9, 11, 16, 17, 30, 32, 33, 36, 41, 42, 43], "anoth": [3, 10, 30, 42, 43], "answer": 39, "anywher": 23, "ao": [5, 6, 43], "apart": 43, "api": [5, 6, 34, 39, 43], "app": [5, 6, 7], "appear": 43, "append": [1, 43], "append_first_op_types_to_quantize_list": 43, "append_pre_post_processing_op": 43, "appendprepostprocessingop": [22, 42], "appli": [18, 22, 24, 27, 29, 33, 42, 43], "applic": [5, 6, 31, 43], "approach": [23, 24, 33, 43], "appropri": 43, "approxim": 29, "ar": [2, 3, 5, 6, 8, 11, 13, 16, 18, 22, 23, 24, 27, 28, 30, 33, 34, 35, 39, 41, 42, 43, 44, 45], "arc": [11, 14], "architectur": [2, 4, 25, 28, 30], "arena_extend_strategi": 22, "argmax": 22, "argument": [13, 17, 42, 43], "aris": 43, "arithmet": [23, 28], "around": [5, 6], "art": 8, "articl": 7, "articul": 8, "artifact": [7, 15, 35, 36, 42], "arxiv": 43, "ask": [5, 6], "asset": [11, 13, 14, 15, 27, 42], "assign": [28, 32, 42, 43], "assist": [5, 6, 7, 8, 42], "associ": [39, 43], "assum": [18, 42, 43], "asym": 43, "asymmetr": 39, "atol": 43, "attach": 12, "attent": [28, 42, 43], "attention_mask": [17, 18], "attribut": [1, 16, 18, 27, 35, 43], "audio": 0, "auroc": [1, 19, 42], "auth": [11, 42], "authent": 42, "auto": [2, 3, 5, 8, 36, 37, 43, 45], "auto_optimizer_config": 10, "autoawq": 43, "autoawqquant": [34, 42], "autogptq": 43, "autom": 28, "automat": [1, 6, 8, 11, 15, 18, 20, 22, 28, 36, 39, 42, 43, 45], "avail": [3, 6, 9, 10, 17, 18, 25, 26, 28, 29, 30, 32, 35, 37, 39, 41, 42, 43, 44], "averag": 43, "avg": [10, 19, 27, 42], "avoid": [17, 43], "awai": 15, "awar": [8, 10, 34, 42, 43], "awq": [8, 33, 34, 36, 39, 42, 43], "awq_lit": 33, "ax": [22, 42, 43], "axi": [42, 43], "az": [7, 16], "azur": [5, 6, 7, 20, 27, 35, 36, 38, 39, 45], "azureml": [0, 2, 7, 12, 13, 27], "azureml_cli": [11, 18, 20, 35], "azureml_client_config": 35, "azureml_datastor": [11, 20, 42], "azureml_job_output": [11, 20], "azureml_model": [11, 20], "azureml_registry_model": [11, 18, 20], "azureml_system": 35, "azuremlbatch": 27, "azuremlonlin": 27, "azuremlsystem": 2, "azurend12ssystem": 35, "azurend24rssystem": 35, "azurend24ssystem": 35, "azurend6ssystem": 35, "azurend96a100system": 35, "azurend96asystem": 35, "azurendv2system": [35, 42], "b": [5, 6, 22, 24, 30, 33, 43], "backend": [10, 18, 25, 33, 42, 43], "backpropag": 24, "bandwidth": [5, 6, 33], "base": [3, 5, 6, 7, 8, 10, 17, 22, 24, 27, 34, 35, 42, 43], "base_environment_id": 27, "base_imag": [11, 35, 42], "basic": [22, 30, 39, 43], "basic_quantization_flow": 43, "batch": [17, 19, 27, 30, 39, 42, 43], "batch_siz": [1, 10, 17, 18, 19, 22, 30, 39, 42, 43], "batchdeploy": 27, "batchnorm": 22, "beam": [42, 43], "beamsearch": [42, 43], "becaus": [8, 43], "becom": 28, "been": [2, 16, 22], "befor": [12, 13, 16, 17, 33, 39, 42, 43], "begin_of_text": [5, 6], "behavior": [29, 43], "being": [35, 39, 43], "belong": 3, "below": [5, 6, 7, 35, 43], "benefit": 38, "bert": [0, 10, 17, 22, 42, 43], "bert_gpu": 10, "bert_kera": 43, "bert_tf": 43, "besid": [17, 30, 41], "best": [2, 10, 19, 22, 27, 30, 32, 36, 42, 43, 45], "bestcandidatemodel": 27, "bestcandidatemodel_1": 27, "bestcandidatemodel_2": 27, "bestcandidatemodel_k": 27, "better": [10, 22, 42, 43], "between": [2, 10, 11, 18, 22, 28, 30, 34, 39, 42, 43], "bf16": 43, "bfloat16": [24, 39, 43], "bgr": 22, "bia": [22, 43], "bias": [22, 26], "bias_bitwidth": 43, "big": [34, 43], "bin": [35, 43], "binari": [42, 43], "binary_fil": 43, "bind": 43, "bit": [0, 8, 23, 24, 25, 26, 33, 34, 42, 43], "bitsandbyt": 43, "bitwidth": 8, "blob": 17, "block": [2, 39, 43], "block_siz": [39, 43], "block_to_split": 43, "blocksiz": 43, "blockwis": 39, "blog": [24, 28, 43], "bloom": [24, 43], "bn": 22, "bnb": 37, "bnb4": [8, 39], "bool": [10, 16, 27, 41, 42, 43, 44], "boolean": [35, 42], "both": [10, 19, 20, 30, 33, 35, 43], "broad": [5, 33], "broadcast": 22, "build": [2, 27, 35, 37, 43], "build_context_path": 35, "builder": 43, "built": [10, 22, 42, 45], "bundl": 43, "byte": [39, 43], "c": [5, 6, 13, 24, 25, 33, 39, 42, 43], "c499e39e42693aaab050820afd31e0c3": 27, "cach": [2, 9, 10, 14, 15, 25, 27, 42, 43, 45], "cache_config": 16, "cache_dir": [9, 10, 16, 27, 42], "calcul": [8, 19, 33, 43], "calib_data_config": [22, 23, 32, 33], "calib_data_coonfig": 42, "calibr": [8, 25, 33, 34, 43], "calibrate_method": [33, 43], "calibration_batch_s": 43, "calibration_data_config": [24, 43], "calibration_data_read": 43, "calibration_nsampl": 43, "calibration_sampling_s": [33, 43], "calibrationmethod": 43, "call": [5, 6, 7, 9, 10, 17, 19, 22, 24, 33, 34, 45], "callabl": [3, 16, 41, 43], "can": [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 42, 43, 45], "cancel": 43, "candid": [2, 27], "cannot": [5, 6, 22, 25, 26, 30, 35, 42, 43], "capabl": [5, 22, 36, 38], "captur": [36, 37, 42, 43, 45], "capturesplitinfo": [39, 42], "cascad": 28, "case": [10, 17, 25, 30, 32, 35, 39, 42, 43], "cast": [22, 39, 43], "catalog": [5, 6, 18, 36], "catch": 10, "categor": [3, 35, 43], "categori": 3, "caus": [11, 42, 43], "causal": 42, "cb1_uint8": 22, "cd": 37, "centercrop": 22, "certain": [2, 18, 30, 32, 43], "chain": [8, 22], "chang": [5, 6, 22, 43], "channel": 43, "charact": 43, "chat": [5, 6], "chat_templ": [5, 6], "check": [15, 16, 22, 24, 26, 32, 33, 34, 43], "checkpoint": 43, "checkpoint_path": 43, "child": 41, "children": [39, 43], "chipset": 25, "choic": [39, 43], "choos": [6, 22, 28, 43], "chose": [2, 32], "cifar": 42, "cl": 3, "clang": [25, 43], "class": [1, 17, 41, 42, 43], "classif": [10, 17], "classmethod": 3, "clean": 42, "clean_cach": 42, "clean_evaluation_cach": 42, "cli": [5, 6, 7, 8, 18, 39, 40, 45], "client": 35, "clip": 43, "clone": 37, "close": 15, "cloud": 39, "cluster": [7, 11, 15, 35, 42], "code": [5, 6, 7, 17, 24, 27, 32, 37, 39, 42, 43], "code_fold": 27, "collect": [1, 41, 42, 43], "column": [24, 39, 43], "com": [11, 17, 22, 35, 37, 42, 43], "combin": [2, 10, 29, 32, 43], "come": 34, "comma": [39, 43], "command": [5, 6, 7, 8, 9, 12, 13, 15, 18, 22, 28, 36, 37, 43], "common": [22, 43, 45], "common_evalu": [10, 17, 27, 42], "common_pass": 17, "compact": 8, "compar": [8, 33, 34], "comparison": 10, "compat": [22, 33, 43], "compil": [25, 31, 42, 43], "complet": [7, 8, 16, 17, 18, 24, 30, 42], "complex": [33, 36], "compliant": 33, "compon": [2, 22, 28, 36, 41, 43, 45], "compos": [2, 17, 42], "composit": [42, 43], "compositemodel": 41, "compositemodelhandl": [41, 42, 43], "compress": [8, 28, 33, 43, 45], "compress_to_fp16": 43, "compressor": [0, 8, 9, 42, 43], "compris": 8, "comput": [0, 2, 5, 7, 8, 12, 13, 18, 22, 23, 26, 27, 29, 33, 35, 36, 38, 39, 42, 43], "compute_dtyp": [24, 43], "compute_logit": [5, 6, 7], "compute_nam": [7, 13, 39], "compute_param": 42, "concat": 22, "concatfromsequ": 22, "concept": [2, 35], "cond": 22, "conda": [5, 11, 35, 37, 42], "conda_file_path": [11, 35, 42], "condit": [3, 22, 30, 43], "conditionaldefault": [3, 43], "conduc": 34, "config": [2, 3, 9, 10, 11, 12, 13, 15, 20, 22, 23, 24, 25, 26, 30, 32, 33, 34, 35, 37, 39, 41, 43], "config_fil": [10, 15], "configu": 15, "configur": [1, 2, 5, 6, 9, 11, 18, 20, 39, 40, 42, 43, 44], "confirm": 39, "conflict": 17, "connect": [14, 28], "consecut": [22, 29], "conserv": 43, "consid": [28, 43], "consist": [2, 43], "consol": [5, 6], "consolid": [33, 34], "const": [22, 43], "constant": [22, 29, 43], "constant_inputs_file_nam": 41, "constraint": 43, "construct": [2, 45], "consum": [5, 6, 39, 43], "consumpt": 33, "contain": [2, 9, 16, 17, 23, 26, 27, 28, 32, 35, 39, 41, 42, 43], "container_nam": [16, 39], "content": [25, 43], "context": [5, 6, 24, 35, 42, 43], "continu": [15, 16, 43], "contrib": [39, 42, 43], "contribut": 37, "control": [5, 6, 30, 43], "conv": [22, 43], "conveni": 10, "convers": [0, 2, 9, 10, 36, 38, 43], "conversion_devic": 39, "convert": [5, 6, 7, 8, 9, 17, 22, 23, 25, 26, 28, 31, 33, 34, 42, 43], "convert_attribut": 43, "convert_float_to_float16": 43, "convertbgrtoimag": 22, "convertimagetobgr": 22, "copi": [5, 6, 7, 43], "core": [2, 16, 25, 26, 39, 43], "coreml": 30, "correct": 43, "correctli": [39, 43], "correspond": [22, 26, 30, 39, 43], "cosin": 43, "cost": [33, 43], "cost_model": [39, 43], "costsplit": 28, "could": [3, 35, 42, 43], "count": [22, 28, 35], "coupl": 13, "cover": [17, 22, 45], "cpu": [0, 3, 5, 6, 7, 8, 9, 11, 24, 27, 31, 34, 35, 37, 39, 42, 43, 45], "cpu_1": 27, "cpu_cor": [39, 43], "cpu_spr": 43, "cpuexecutionprovid": [3, 5, 6, 7, 8, 9, 22, 27, 35, 39, 43], "cr1_uint8": 22, "creat": [1, 2, 3, 5, 6, 7, 8, 13, 17, 18, 22, 27, 28, 34, 35, 36, 37, 39, 41, 42, 43], "create_dataload": 42, "create_stream": [5, 6, 7], "creation": [11, 27, 42], "credenti": [11, 42], "cricket": 7, "criteria": 39, "critic": 39, "cross": [5, 6, 22, 23, 30, 33], "csv": [28, 39, 43], "cuda": [0, 3, 5, 6, 10, 39, 43], "cuda11": 35, "cuda_perf_tun": 10, "cuda_transformers_optim": 10, "cudaexecutionprovid": [3, 5, 6, 10, 22, 28, 35, 39], "cudnn8": 35, "cudnn_conv_algo_search": 22, "curat": 20, "current": [2, 8, 10, 16, 17, 20, 28, 35, 39, 42, 43], "custom": [0, 2, 4, 16, 22, 24, 36, 39, 42, 43], "custom_io": 43, "customized_dataload": 17, "customized_huggingface_dataset": 17, "customized_huggingface_pre_process": 17, "customized_post_process": 17, "cut": [8, 36, 45], "cv": 43, "d": [13, 33, 39], "d1": 43, "d2": 43, "d_": 43, "damp": 43, "damp_perc": 43, "dampen": 43, "data": [1, 3, 15, 18, 19, 27, 30, 33, 36, 39, 42, 43], "data2": 43, "data_config": [1, 9, 10, 17, 18, 19, 22, 23, 26, 32, 33, 34, 42, 43], "data_dir": [11, 17, 19, 42], "data_fil": [17, 39], "data_nam": [7, 8, 10, 17, 18, 39], "data_typ": [22, 29, 43], "dataclass": 3, "dataconfig": 43, "datacontain": 17, "dataload": [1, 17, 43], "dataloader_config": [1, 10, 17, 22, 24, 26, 33, 42], "datamodul": 43, "dataset": [1, 4, 8, 17, 24, 25, 33, 34, 37, 39, 43], "dataset_1": 17, "dataset_2": 17, "datastor": [15, 35, 42], "datastore_nam": [11, 20, 42], "date": 15, "datetim": 43, "dd": 15, "dead": 22, "deadend": 22, "deberta": 0, "debug": [39, 43], "decid": [32, 42], "decis": 28, "decod": [5, 6, 7, 22, 41, 43], "decoder_input_id": 43, "decoder_model": 43, "decoder_with_past": 43, "decoder_with_past_model": 43, "decompos": 44, "decomposit": 24, "decreas": 30, "dedic": 43, "dedicated_qdq_pair": 43, "deep": [17, 23, 24, 26, 31, 33, 34], "def": [1, 3, 17, 19, 39], "default": [0, 2, 3, 5, 6, 10, 11, 12, 13, 15, 16, 17, 18, 24, 27, 30, 32, 33, 35, 37, 39, 42, 43], "default_auth_param": [11, 42], "default_dataload": 17, "default_valu": [3, 32, 33, 43, 44], "defaultazurecredenti": [11, 42], "defaultlook": 43, "defin": [1, 2, 11, 17, 19, 27, 28, 32, 33, 36, 39, 43, 45], "definit": [30, 39, 42], "degrad": [1, 10, 19, 42], "delet": [13, 39], "deliv": 33, "depend": [3, 5, 6, 32, 35, 39, 42, 43], "deploi": [12, 23, 27, 28], "deploy": [2, 27, 36, 45], "deployment_config": 27, "deployment_nam": 27, "deploymentconfig": 27, "deprec": 43, "dequant": [42, 43], "dequantizelinear": [29, 43], "desc_act": 43, "describ": [19, 20, 22, 24, 32], "descript": [3, 8, 22, 27, 30, 42, 43, 44], "design": [4, 38], "desir": [2, 25, 33, 37, 39, 43], "detail": [7, 9, 11, 12, 13, 15, 18, 19, 22, 23, 24, 25, 26, 29, 30, 31, 33, 34, 35, 36, 38, 39, 42, 43, 44], "detect": 43, "determin": [10, 19, 28, 43], "develop": [10, 25, 26, 33, 37, 43], "devic": [2, 5, 6, 7, 8, 9, 10, 19, 25, 28, 35, 39, 42, 43, 45], "device_id": 22, "device_typ": 39, "diag": 43, "diagon": 43, "diagram": 2, "dict": [1, 3, 11, 27, 41, 42, 43], "dictionari": [2, 17, 32, 35, 39, 42, 43], "differ": [1, 2, 5, 6, 8, 10, 19, 22, 24, 25, 26, 27, 28, 35, 36, 39, 42, 43, 45], "difficult": 43, "diffus": [0, 43], "dim": [29, 39], "dim_index": 22, "dim_param": 43, "dim_valu": [30, 43], "dimens": [22, 30, 42, 43], "direct": 25, "directli": [1, 16, 17, 20, 39, 42, 43], "directml": [0, 3, 5, 6, 33, 35, 37, 43], "directori": [1, 2, 16, 17, 18, 19, 20, 26, 39, 42, 43], "directx": 5, "disabl": [10, 16, 43], "disable_al": 43, "disable_auto_optim": 10, "disable_force_evaluate_other_ep": 39, "disable_search": 32, "discov": [11, 18], "discuss": 2, "disk": [5, 6], "distil": [8, 33], "distilbert": 43, "distribut": [23, 42, 43], "distributedhfmodel": 43, "distributedhfmodelhandl": [41, 42, 43], "distributedonnxmodelhandl": [41, 42, 43], "div": 43, "dive": 17, "divid": [28, 43], "dlc": [26, 42, 43], "dml": [3, 33], "dmlexecutionprovid": [5, 6, 35, 39], "dn": 43, "do": [3, 18, 43], "do_copy_in_default_stream": 22, "do_valid": 43, "doc": [10, 43], "docker": [2, 12, 27, 42], "dockerfil": 35, "dockersystem": 2, "document": [5, 6, 11, 12, 18, 19, 20, 22, 26, 32, 42, 43], "doe": [10, 16, 17, 22, 35, 42, 43, 44], "doesn": [20, 27, 35], "domain": 43, "don": [3, 11, 22, 30, 42, 43], "done": [7, 22], "doubl": [11, 42], "double_qu": 43, "down": [13, 15, 43], "downcast": 43, "download": [5, 6, 7, 11, 12, 16, 18, 24, 35, 42], "dpu": [0, 43], "dq": 43, "driven": 33, "driver": 5, "drop": 34, "drop_typ": 23, "dropout": [22, 43], "dsp": 26, "dtype": [39, 42, 43], "due": [8, 28], "dummi": [17, 18, 39, 41], "dummy_data_config_templ": 17, "dummy_input": 39, "dummy_inputs_func": [16, 41], "dummydatacontain": 17, "duo_scal": 43, "duplic": 22, "dure": [8, 22, 23, 24, 37, 39, 42, 43], "dynam": [0, 8, 9, 33, 39, 42, 43], "dynamic_ax": [18, 42], "dynamic_lora_r": 43, "dynamic_shap": 18, "dynamic_to_fixed_shape_dim_param": 39, "dynamic_to_fixed_shape_dim_valu": 39, "dynamictofixedshap": [30, 42], "dynamo_export": [39, 43], "e": [5, 8, 10, 22, 27, 33, 37, 39, 43], "each": [2, 3, 11, 17, 22, 24, 27, 28, 30, 35, 39, 42, 43, 44, 45], "easi": [10, 28, 33, 34, 45], "easili": [8, 16, 17, 36, 42, 45], "edg": [8, 28, 36, 45], "effect": 43, "effici": [6, 22, 23, 24, 30, 33, 43], "egg": 37, "either": [1, 5, 6, 7, 33, 35, 39, 42, 43], "element": 43, "element_wise_binary_op": 43, "elimin": 22, "els": [42, 43], "embed": [28, 39, 43], "embedlayernorm": 30, "emploi": [2, 45], "empti": [5, 6, 42, 43], "en": [43, 44], "enabl": [5, 6, 8, 10, 16, 18, 30, 36, 37, 39, 42, 43, 45], "enable_al": 43, "enable_bas": 43, "enable_cpu_fallback": 43, "enable_cuda_graph": [10, 39, 43], "enable_dpu": 43, "enable_extend": 43, "enable_htp": [26, 43], "enable_profil": [22, 39, 42, 43], "enable_search": 39, "enable_trt_fp16": 10, "enablesubgraph": 43, "encapsul": [8, 43], "encod": [5, 6, 7, 22, 39, 41, 43], "end": [5, 6, 7, 22, 42, 43], "end_header_id": [5, 6, 7, 8], "endpoint": 27, "endpoint_nam": 27, "engin": [3, 10, 11, 12, 25, 26, 27, 31, 33, 35], "enhanc": 43, "enough": 15, "ensur": [33, 43], "entir": [5, 6, 30, 42, 43], "entiremodel": 39, "entrei": 39, "entri": [39, 43], "entropi": 43, "env": 27, "environ": [2, 5, 12, 15, 22, 25, 26, 27, 30, 33, 37, 39, 42], "environment_vari": [27, 35], "eot_id": [5, 6, 7, 8], "ep": [0, 2, 10, 33, 35, 39, 43], "epoch": 43, "equal": [28, 42], "equival": [29, 42], "erf": 29, "erf_output": 29, "error": [5, 6, 35, 39, 42, 43, 45], "especi": 43, "estim": [2, 42, 44], "etc": [3, 10, 22, 33, 35, 42, 43], "eval_accuraci": 19, "eval_data_config": 43, "eval_dataset": 43, "eval_split": 39, "eval_subset": 39, "evalu": [1, 10, 11, 16, 17, 18, 19, 27, 35, 39, 41, 43, 45], "evaluate_func": [1, 19, 42], "evaluate_func_kwarg": [19, 42], "evaluate_input_model": 42, "evaluation_strategi": 43, "even": [8, 28, 34, 39, 43], "everi": 43, "everywher": 33, "ex": 43, "exactli": 22, "exampl": [5, 6, 7, 10, 11, 16, 17, 18, 19, 28, 32, 35, 38, 39, 41, 43, 45], "example_input_func": 43, "except": [5, 6], "exclud": [11, 22, 42, 43], "exclude_emb": [39, 43], "exclude_lm_head": [39, 43], "exclude_managed_identity_credenti": 42, "excluded_precis": 43, "exclus": [13, 30, 39], "execut": [3, 5, 6, 7, 8, 9, 10, 16, 22, 25, 26, 27, 33, 35, 36, 39, 41, 42, 43, 45], "execution_mod": 27, "execution_mode_list": [39, 43], "execution_ord": [2, 10, 27, 42], "execution_provid": [3, 9, 10, 19, 27, 35, 42, 43], "exhast": 39, "exhaust": [2, 22, 39, 42, 44], "exist": [13, 16, 18, 22, 27, 28, 30, 35, 39, 43], "exit": [5, 6], "exllama": 43, "expand": 22, "expect": [10, 24, 43], "experi": [8, 33, 36], "experiment": 45, "expert": 10, "explan": 29, "explicitli": [20, 43], "explor": 40, "export": [15, 27, 28, 35, 43], "export_compat": 43, "export_in_mlflow_format": 27, "expos": [29, 30], "express": 22, "extend": [38, 43], "extens": [22, 31, 42, 43], "extern": [22, 43], "external_data_nam": 43, "external_initializers_file_nam": 41, "extra": [27, 35, 37, 39, 43], "extra_arg": [25, 42, 43], "extra_config": [27, 43], "extra_opt": 43, "extra_session_config": [27, 39, 43], "extract": [42, 43], "extractadapt": [22, 42], "extractedadapt": 39, "f": [5, 6, 7], "f1": [10, 18, 42], "f1_score": [1, 19, 42], "face": [6, 7, 8, 9, 17, 24, 31, 36, 42, 43, 45], "factor": [10, 16, 43], "fail": [11, 42], "failur": 43, "fake": 24, "falcon": 0, "fals": [5, 6, 7, 10, 16, 18, 22, 27, 32, 35, 39, 41, 42, 43, 44], "famili": 28, "fast": [5, 6, 43], "fast_bias_correct": 43, "faster": [16, 25, 33, 34], "fastgelu": 30, "fatal": 42, "favorit": [8, 34], "featur": [16, 33, 37, 38, 40, 43], "fetch": 18, "few": 28, "field": [3, 10, 16, 17, 19, 39, 43], "file": [1, 3, 5, 6, 7, 10, 11, 12, 13, 15, 16, 18, 22, 24, 25, 26, 33, 34, 35, 42, 43], "fill": [12, 43], "filter": 35, "final": [2, 3, 16, 28, 35, 42, 43], "final_orient": 43, "find": [2, 11, 15, 18, 22, 24, 30, 36, 38, 42], "fine": [0, 7, 8, 22, 24, 30, 39, 42, 43], "finetun": [5, 24, 36, 37, 45], "first": [2, 9, 17, 18, 27, 28, 42, 43], "first_conv_or_matmul_quant": 43, "firstli": [27, 43], "five": [2, 35], "fix": [2, 25, 26, 32, 33, 39, 42, 43], "flag": [12, 13, 35, 43], "flatten": 22, "flexibl": [10, 15, 22, 30, 33], "float": [8, 23, 29, 33, 39, 42, 43], "float16": [10, 39, 42, 43], "float32": [30, 39, 42, 43], "floattoimagebyt": 22, "flop": [28, 39, 43], "flow": 10, "flush": [5, 6, 7], "focus": [23, 33], "fold": 22, "folder": [3, 11, 27, 39, 42, 43], "follow": [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 15, 17, 18, 19, 22, 26, 27, 28, 34, 35, 37, 41, 42, 43, 44], "footprint": [7, 23, 24, 33], "forc": [39, 43], "force_evaluate_other_ep": 43, "force_fp16_input": 43, "force_fp32_nod": 43, "force_fp32_op": 43, "forcequantizenoinputcheck": 43, "form": [22, 25, 42], "format": [5, 6, 7, 8, 9, 22, 25, 27, 30, 33, 39, 42, 43], "found": [13, 16, 18, 22, 24, 25, 26, 30, 42, 43], "four": [17, 28], "fp16": [0, 5, 6, 8, 10, 28, 31, 34, 39, 43], "fp32": [0, 5, 6, 10, 39, 43], "fp4": [39, 43], "fp8": 39, "fraction": 43, "framework": [23, 24, 25, 26, 33, 42, 43], "free": 12, "freez": 24, "friendli": [33, 34], "from": [1, 2, 3, 5, 6, 7, 8, 9, 13, 16, 17, 18, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 39, 41, 42, 43, 44], "from_pretrain": 42, "frontier": [2, 42], "frozen": 24, "frustrat": 45, "ft": [7, 8], "full": [8, 22, 32], "fulli": [32, 36], "function": [1, 17, 19, 22, 29, 39, 41, 42, 43], "further": 43, "fuse": [22, 33, 42, 43], "fuse_layernorm": 43, "fusion": [30, 43], "fusion_opt": 43, "fusionopt": 43, "futur": 18, "g": [5, 8, 10, 22, 27, 33, 39, 43], "gate": [5, 6, 18, 43], "gather": 22, "gemm": [22, 43], "gemm_to_matmul": 43, "gemv": 43, "genai": 43, "gener": [5, 6, 7, 8, 18, 20, 22, 26, 27, 30, 33, 35, 41, 42, 43, 44], "generate_next_token": [5, 6, 7], "generation_config": [24, 43], "generatorparam": [5, 6, 7], "get": [6, 18, 28, 35, 38, 39, 41, 43, 45], "get_model_compon": 41, "get_next_token": [5, 6, 7], "get_qnn_qdq_config": 43, "gigabyt": 28, "git": 37, "github": [17, 22, 37, 39, 42], "give": [30, 42], "given": [10, 17, 39, 42, 43], "glue": [10, 17], "go": [5, 6], "goal": [1, 2, 10, 19, 33, 42], "gpt": 24, "gpt2": 43, "gpt_neox": 43, "gptj": 0, "gptq": [8, 33, 34, 39, 42, 43], "gptqquantiz": [34, 42], "gpu": [0, 3, 5, 6, 7, 8, 10, 24, 30, 31, 33, 34, 35, 37, 39, 43, 45], "gpu_mem_limit": 22, "gradient": 24, "grain": 30, "grant": 18, "graph": [5, 6, 22, 25, 28, 29, 30, 33, 36, 37, 41, 42, 43, 45], "graph_optimization_level": [27, 43], "graphsurgeri": [29, 42], "greatli": 7, "grid": 44, "group": [7, 11, 13, 35, 39, 42, 43, 44], "group_siz": 43, "groupqueryattent": 43, "guanaco": 18, "guid": [5, 18, 28], "h": [39, 43], "ha": [2, 3, 5, 7, 8, 9, 10, 16, 20, 22, 24, 28, 30, 37, 39, 41, 42, 43, 45], "hadamard": [34, 43], "half": 43, "handl": [5, 6, 43], "handler": 43, "hang": [11, 42], "happen": 23, "hardwar": [0, 2, 5, 6, 8, 22, 23, 30, 33, 34, 35, 42, 43, 45], "hash": 39, "have": [1, 2, 5, 7, 8, 11, 18, 19, 22, 24, 27, 28, 30, 35, 41, 42, 43], "head": [28, 39, 42, 43], "header": [39, 43], "height": 22, "help": [5, 6, 10, 13, 23], "helper": 30, "here": [1, 10, 12, 16, 17, 18, 22, 30, 32, 35, 42, 43], "hessian": 43, "hexagon": 26, "hf": [18, 42, 43], "hf_model_nam": 39, "hf_token": [18, 42], "hfloadkwarg": 41, "hfmodel": [9, 10, 18, 24, 31, 39, 42], "hfmodelhandl": [41, 42, 43], "hftrainingargu": 24, "hh": 15, "hidden": [42, 43], "hidden_s": [42, 43], "high": [8, 22], "higher": [28, 33, 39, 42], "higher_is_bett": [19, 27, 42], "histori": [27, 43], "hold": 3, "home": [12, 35], "host": [2, 3, 9, 10, 18, 27, 35, 36], "how": [4, 7, 11, 12, 16, 18, 22, 28, 38, 39, 42, 43], "howev": 28, "hqq": [39, 43], "html": [22, 39, 43, 44], "htp": 43, "htp_soc": 43, "http": [11, 16, 17, 22, 37, 39, 42, 43, 44], "hub": [20, 28, 42], "hug": [6, 7, 8, 9, 17, 24, 31, 36, 42, 43, 45], "huggingfac": [5, 6, 8, 17, 24, 28, 34, 39, 42, 43], "huggingface_data_config": 17, "huggingface_dataset": 17, "huggingface_metr": [10, 18, 42], "huggingface_pre_process": 17, "huggingfacecontain": [10, 17, 18], "huggingfacetb": [5, 6], "hw": 22, "hyperparamet": [33, 34], "i": [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, 42, 43, 44], "id": [5, 6, 8, 11, 13, 20, 27, 35, 39, 43], "ideal": [2, 33], "ident": [11, 18, 22, 35, 42], "identifi": 22, "idl": [13, 39], "idle_time_before_scale_down": [13, 39], "ignor": [3, 13, 17, 27, 39, 43], "ignored_scop": 43, "ignored_scope_typ": 43, "ignorescopetypeenum": 43, "illustr": 2, "imag": [22, 27, 35, 43], "image_format": 22, "image_nam": 35, "imagebytestofloat": 22, "immedi": 16, "impact": 8, "implement": [2, 22, 24, 26, 28, 33, 34, 39, 42], "import": [1, 3, 5, 6, 7, 17, 18, 43], "improv": [10, 19, 22, 23, 24, 28, 30, 33, 34, 42, 43, 45], "inc": [8, 25, 43], "inc_dynam": 39, "inc_quant": 33, "incdynamicquant": [9, 33, 42], "incept": 0, "inceptionv3": 26, "includ": [3, 5, 8, 11, 15, 16, 17, 22, 27, 28, 30, 33, 35, 39, 43, 45], "include_runtime_packag": 27, "incompat": 22, "incquant": [33, 42], "increas": [11, 24, 33, 42], "incstaticquant": [33, 42], "independ": 2, "index": [29, 42], "indic": [13, 17, 29, 35], "individu": [39, 45], "infer": [8, 9, 17, 19, 20, 22, 23, 24, 25, 26, 28, 29, 30, 31, 33, 34, 35, 36, 39, 42, 43], "inferenc": 27, "inference_set": [27, 41, 42], "inferenceserverconfig": 27, "inferencing_serv": 27, "influenc": 16, "info": [27, 29, 39, 42], "inform": [3, 10, 18, 23, 24, 29, 43, 44], "informs": 39, "infrastructur": 12, "infus": [5, 6], "init_overrid": 43, "initi": [2, 11, 22, 24, 29, 42, 43], "inject": 24, "inlin": 22, "input": [2, 3, 5, 6, 8, 9, 10, 11, 16, 17, 22, 23, 24, 26, 27, 29, 33, 35, 41, 43, 45], "input0": 43, "input0_chanfirst": 43, "input1": [29, 43], "input2": [29, 43], "input_col": [10, 17, 39], "input_dim": [30, 43], "input_dir": 17, "input_id": [5, 6, 7, 17, 18, 42], "input_idx": 29, "input_index": 22, "input_int32": 43, "input_layout": 43, "input_list": [25, 42, 43], "input_model": [9, 10, 18, 20, 42], "input_model_dtyp": 43, "input_nam": [17, 18, 26, 30, 41, 42, 43], "input_nod": 43, "input_ord": 17, "input_order_fil": 17, "input_shap": [17, 18, 26, 30, 41, 42, 43], "input_suffix": 17, "input_token": [5, 6], "input_typ": [17, 18, 42, 43], "inputlayout": 43, "inputs_to_make_channel_last": 43, "inputtyp": 43, "insensit": [32, 42], "insert": [23, 33, 42, 43], "insertbeamsearch": [22, 42], "inside_layer_modul": 43, "instal": [2, 3, 12, 18, 27, 35, 36, 38, 43], "instanc": [13, 22, 27, 35, 39, 43], "instance_count": [27, 35], "instance_typ": 27, "instead": [16, 25, 26, 30, 35, 39, 43], "instruct": [5, 6, 7, 8, 9, 12, 15, 18, 26, 28, 33, 36, 39], "int": [3, 10, 11, 27, 41, 42, 43, 44], "int16": 39, "int32": [39, 43], "int4": [0, 5, 6, 10, 30, 39, 43], "int4_accuracy_level": [39, 43], "int4_block_s": [39, 43], "int4_quantization_mod": 39, "int64": [17, 18], "int8": [0, 5, 6, 10, 33, 39, 43], "integ": [22, 23, 30, 43], "integr": [0, 7, 14, 22, 30, 33, 34, 39, 45], "intel": [0, 8, 9, 10, 17, 23, 42, 43, 45], "intellisens": 42, "intend": 28, "intens": 28, "inter": [39, 43], "inter_op_num_thread": 27, "inter_thread_num_list": [39, 43], "interf": 27, "interfac": [3, 5, 6, 13, 22, 30, 33], "intermedi": [2, 16, 29, 43], "intern": 43, "internet": 16, "interpret": 1, "interrupt": 15, "interv": [11, 42, 43], "intra": [39, 43], "intra_op_num_thread": 27, "intra_thread_num_list": [39, 43], "introduc": [3, 22, 28], "invalid": [3, 22, 43], "invoc": 27, "invok": [3, 39, 43], "involv": [22, 29], "io": [12, 18, 39, 41, 42, 43, 44], "io_bind": [9, 10, 27, 39, 43], "io_config": [16, 18, 39, 41, 42], "io_map": 22, "iobind": [39, 43], "ioconfig": 41, "iomapentri": 22, "ir": [10, 23, 42], "is_don": [5, 6, 7], "is_generative_model": 39, "is_symmetr": 43, "isolatedort": 35, "isolatedortsystem": 2, "issu": [11, 42], "item": [10, 17, 42], "iter": [2, 41, 42, 43], "its": [2, 18, 19, 22, 23, 24, 25, 26, 29, 30, 33, 34, 35, 42, 43, 44], "itself": 22, "job": [7, 12, 18, 36, 42, 43], "job_id": [7, 11, 20], "joint": [2, 10, 27, 42], "jsexecutionprovid": 39, "json": [1, 9, 10, 13, 15, 17, 35, 39, 43, 45], "json_config": 17, "jupyt": 5, "just": [17, 43], "k": 27, "k8": 12, "keep": [5, 6, 10, 43], "keep_io_typ": [30, 43], "kei": [1, 17, 18, 22, 39, 42, 43], "keras2onnx": 43, "kernel": 43, "key1": 39, "key2": 39, "keyboardinterrupt": [5, 6], "keyvault": [18, 39, 42], "keyvault_nam": [18, 39, 42], "keyword": [17, 42], "kind": [12, 22], "knextpoweroftwo": 22, "knob": 22, "know": 28, "knowledg": [10, 22], "known": [26, 27], "kube": 12, "kubeconfig": 12, "kv_cach": [17, 42], "kwarg": [17, 42], "l35": 17, "label": [10, 17, 35], "label_col": [10, 17], "languag": [5, 6, 8, 28, 34, 39, 42, 43], "larg": [11, 24, 28, 42, 43], "last": [12, 43], "last_conv_or_matmul_quant": 43, "latenc": [2, 10, 23, 27, 36, 41, 42, 43, 45], "latency_data_config": 19, "latent": 43, "later": 25, "latest": [8, 22, 37, 43], "layer": [22, 24, 28, 31, 39, 42, 43], "layer_name_filt": 43, "layernorm": [30, 43], "layers_block_nam": 43, "layout": [22, 43], "lead": 8, "learn": [4, 5, 13, 16, 18, 20, 22, 23, 24, 26, 27, 30, 31, 33, 34, 35, 36, 38, 40, 43], "learning_r": [24, 43], "least": [17, 24], "leav": [30, 43], "left": 43, "length": [5, 6, 30, 39, 42, 43], "less": [28, 33], "let": [1, 17, 28, 45], "letter": 17, "level": [15, 20, 28, 30, 39, 42, 43], "leverag": [17, 22], "lib": [42, 43], "lib_nam": 43, "lib_target": [25, 43], "librari": [5, 8, 22, 24, 30, 31, 33, 34, 42, 43], "light": [2, 5, 6], "lightn": [0, 24, 43], "lightning_modul": 43, "lightningdatamodul": 43, "lightningmodul": 43, "like": [4, 7, 11, 13, 16, 17, 22, 24, 27, 28, 31, 33, 39, 42, 43], "limit": [28, 39], "line": [8, 13, 18, 22, 28, 42, 43], "linear": [31, 42, 43], "link": [0, 24, 43], "linux": [5, 25, 43], "list": [2, 17, 18, 22, 27, 29, 32, 35, 37, 39, 41, 42, 43, 44], "live": 45, "ll": [5, 6, 7, 8, 9], "llama": [0, 5, 6, 7, 8, 9, 18, 42, 43], "llama2": 0, "llava": 43, "llm": [28, 34], "load": [1, 4, 5, 6, 7, 17, 19, 22, 24, 25, 26, 28, 39, 41, 42, 43], "load_dataset": 17, "load_dataset_config": [1, 10, 17, 18], "load_kwarg": [41, 42], "load_param": 42, "loader": 39, "local": [0, 2, 5, 6, 7, 15, 16, 33, 36, 39, 42], "local_cach": 16, "local_dataset": 17, "local_docker_config": 35, "local_system": [9, 10, 27, 35, 42], "localsystem": [2, 9, 10, 35, 42], "locat": [13, 16, 39, 42], "loftq": 42, "loftq_it": 43, "log": [15, 16, 18, 39, 42], "log_level": [5, 6, 7, 8, 39], "log_severity_level": [9, 42], "log_to_fil": 42, "logger": 43, "logic": [1, 43], "login": [5, 6, 16, 39], "logit": [18, 19, 30, 43], "logits_processor": 43, "logsoftmax": 22, "long": [11, 15, 42, 43], "look": [10, 16, 17], "loop": [0, 24, 43], "lora": [7, 8, 22, 36, 42, 45], "lora_alpha": [24, 39, 43], "lora_dropout": 43, "lora_r": [39, 43], "loss": [8, 43], "low": [24, 43], "lower": [6, 8, 24, 27], "lr_scheduler_typ": 43, "lunch": 12, "m": [2, 15, 25, 26, 28, 39, 43], "mac": 5, "machin": [2, 5, 7, 8, 13, 14, 18, 20, 22, 27, 30, 33, 35, 36, 43], "made": 28, "mai": [8, 12, 16, 22, 30, 37, 43], "main": [2, 17, 37, 41], "main_export": 43, "maintain": 2, "make": [10, 12, 16, 18, 24, 28, 30, 33, 34, 43], "make_input": [22, 43], "manag": [2, 11, 13, 14, 18, 39, 42], "managedonlinedeploy": 27, "mandatori": 35, "mani": [5, 6, 7, 8, 16, 28, 30, 43], "manipul": [29, 36], "manual": [10, 18, 45], "map": 43, "mask": 43, "master": 24, "match": [16, 43], "matmul": [8, 22, 39, 42, 43], "matmul4": 39, "matmulconstbonli": 43, "matmulnbit": [39, 42, 43], "matmulnbitstoqdq": 42, "matric": [22, 24], "matrix": 43, "max": [1, 5, 6, 10, 19, 28, 42, 43], "max_drop": 23, "max_finite_v": 43, "max_it": 42, "max_lay": 43, "max_length": [5, 6, 7], "max_nod": [13, 39], "max_operation_retri": [11, 42], "max_sampl": [10, 17, 39], "max_seq_len": [18, 39], "max_step": [7, 8], "max_tim": 42, "max_trial": 43, "maximum": [11, 13, 39, 42, 43], "maxpool": 43, "mcr": [11, 35, 42], "mean": [6, 18, 33, 42, 43], "measur": [42, 43], "mechan": 45, "meet": [2, 17, 33], "member": [3, 43], "memori": [8, 23, 24, 28, 33, 34, 39], "merg": [0, 24, 42, 43], "merge_adapter_weight": 43, "mergeadapterweight": 42, "met": 42, "meta": [5, 6, 7, 8, 9, 18, 42], "metadata": 43, "metadata_onli": 43, "method": [3, 6, 7, 8, 23, 24, 33, 38, 39, 42, 43], "metric": [1, 2, 10, 36, 42, 43, 45], "metric_1": 27, "metric_2": 27, "metric_3": 27, "metric_config": 42, "metric_func": [1, 19, 42], "metric_func_kwarg": 42, "microsoft": [11, 12, 17, 28, 33, 35, 37, 39, 42, 43], "might": [2, 28, 33, 39, 43], "migraphxexecutionprovid": [5, 6, 39], "min": [10, 19, 42, 43], "min_lay": 43, "min_nod": [13, 39], "min_positive_v": 43, "mini": [27, 28, 33, 39], "mini_batch_s": 27, "minim": 30, "minimum": [13, 39, 42, 43], "minmax": 43, "minms": 43, "minut": [12, 38], "miss": 29, "mistral": 0, "mit": 34, "mix": [39, 42, 43], "mixed_precision_overrides_config": 39, "mixed_precision_overrides_util": 43, "mixedprecisionoverrid": [39, 42], "mixin": 41, "mixtral": 43, "ml": [7, 15, 20, 25, 27, 35], "mlclient": 20, "mlflow": 27, "mm": 15, "mnb_to_qdq": 39, "mnist_requir": 35, "mobil": 30, "mobilenet": 0, "mode": [22, 37, 39, 42, 43], "model": [0, 2, 3, 4, 8, 9, 10, 14, 16, 19, 22, 24, 29, 31, 34, 35, 36, 40, 43, 45], "model_attribut": [27, 41], "model_attributes_kei": 27, "model_attributes_valu": 27, "model_build": 43, "model_compon": 41, "model_component_nam": 41, "model_config": 27, "model_dir": [11, 20], "model_file_format": [39, 41], "model_fold": [5, 6], "model_hash": 39, "model_load": [16, 41], "model_nam": [11, 17, 20, 27, 39], "model_name_or_path": [5, 6, 7, 8, 39], "model_name_pattern": 41, "model_output": 19, "model_packag": 27, "model_path": [9, 10, 11, 18, 20, 27, 41, 42], "model_path_nam": 43, "model_rank": 27, "model_script": [39, 41], "model_typ": [18, 22, 28, 42, 43], "model_vers": [11, 27], "modelbuild": [30, 42], "modelconfig": 41, "modeldtyp": 43, "modelfileformat": 41, "modelpackageconfig": 27, "models_rank": 27, "modeltypeenum": 43, "modif": 29, "modifi": [22, 28], "modul": [0, 1, 17, 28, 30, 31, 39, 42, 43], "modular": 2, "modules_to_fus": 43, "modules_to_not_convert": 43, "modules_to_sav": 43, "monoton": 22, "more": [2, 7, 8, 9, 10, 11, 13, 15, 16, 18, 19, 22, 23, 24, 25, 26, 30, 31, 33, 34, 35, 36, 38, 39, 42, 43, 44], "most": [10, 22, 30, 34, 35, 42, 43], "move": 23, "mrpc": [10, 17], "mse": 43, "msi": 18, "much": 43, "mul": [29, 43], "mul_0": 29, "multi": [2, 7, 45], "multiheadattent": 43, "multipl": [0, 16, 22, 27, 28, 29, 30, 36, 39, 41, 42, 43], "multipli": 22, "multivari": 44, "must": [2, 3, 17, 18, 22, 25, 32, 33, 35, 39, 42, 43], "mutual": [13, 39], "mxnet": 33, "my_dataload": 1, "my_datastor": [11, 20], "my_job_id": [11, 20], "my_keyvault_nam": 18, "my_model": [11, 20], "my_model_dir": 20, "my_modul": 1, "my_olive_project": 1, "my_output_nam": [11, 20], "my_post_process": 1, "my_resource_group": [11, 20], "my_script": 1, "my_subscription_id": [11, 20], "my_val": 1, "my_workspac": [11, 20], "myaccountnam": 16, "mycontainernam": 16, "mydataload": 1, "myenv": 35, "n": [7, 8, 39, 43], "name": [1, 3, 5, 6, 7, 9, 10, 11, 13, 15, 16, 17, 18, 19, 22, 23, 27, 29, 35, 42, 43, 44], "name_pattern": 43, "nc_workspac": 43, "ndarrai": 43, "necessari": [15, 18, 33, 35, 43], "necessarili": 27, "need": [2, 5, 6, 7, 8, 10, 11, 15, 16, 17, 18, 19, 22, 26, 28, 33, 36, 37, 39, 42, 43], "need_layer_fus": 43, "neglig": 24, "neighbor": 43, "ner_huggingface_preprocess": 17, "ner_post_process": 17, "nest": 43, "net": 16, "network": [5, 6, 11, 23, 24, 26, 33, 42, 43], "neural": [0, 8, 9, 23, 26, 42, 43], "new": [4, 5, 6, 7, 12, 13, 18, 24, 29, 35, 36, 39], "new_nam": [29, 43], "new_token": [5, 6, 7], "newoptimizationtrick": 3, "next": [3, 5, 6, 16, 27], "nf4": [24, 39, 43], "ngram": 43, "nlp": [0, 43], "nn": [31, 42, 43], "nnapi": [30, 43], "nnx": 45, "no_auto_batch_dataload": 17, "no_repeat_ngram_s": [22, 43], "node": [13, 22, 29, 30, 39, 42, 43], "node_block_list": 43, "node_nam": 29, "nodes_to_exclud": 43, "nodes_to_quant": 43, "non": [22, 43], "none": [10, 17, 19, 27, 35, 39, 41, 42, 43], "nonoverflow": [33, 43], "nop": 22, "normal": [3, 15], "note": [3, 11, 12, 18, 20, 23, 24, 25, 26, 27, 28, 30, 33, 35, 37, 42, 43], "now": [10, 17, 23, 28, 42, 43], "np": [5, 6, 7], "npu": [0, 5, 6, 25, 35, 39, 45], "nsplit": 28, "null": [9, 17, 27, 42], "num": [28, 39], "num_attention_head": 42, "num_byt": [22, 39, 43], "num_cpu": 35, "num_epoch": [24, 43], "num_flop": [39, 43], "num_gpu": 35, "num_head": [42, 43], "num_hidden_lay": 42, "num_key_value_head": 43, "num_param": [39, 43], "num_rank": 41, "num_sampl": [10, 27, 42, 44], "num_split": [39, 43], "num_step": 43, "number": [11, 13, 16, 17, 22, 27, 28, 39, 42, 43, 44], "numpi": [5, 6, 7, 39, 43], "nvidia": [5, 6, 8, 43, 45], "nvmo": [8, 39], "nvmodeloptquant": 33, "o": [2, 5, 6, 28, 35, 39, 45], "oasst1_train": 18, "object": [1, 2, 3, 22, 33, 43], "object_detect": 43, "objectclass": 3, "occur": [22, 43], "odd": 39, "off": [15, 24, 34, 43], "offlin": [22, 34, 43], "often": 30, "og": [5, 6, 7], "old_nam": [29, 43], "oliv": [1, 2, 3, 6, 7, 8, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 24, 28, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44], "olive_ignored_param_valu": 43, "olive_invalid_param_valu": 43, "olive_managed_env": 35, "olive_output_model": 27, "olive_output_model_cpu": 27, "olivemodelhandl": [3, 41, 43], "oliveol": 43, "olivepass": 33, "omit": 20, "onc": [5, 7, 8, 16, 23, 33, 43], "one": [0, 2, 3, 5, 6, 8, 13, 19, 22, 24, 26, 27, 28, 35, 39, 43], "ones": 33, "onli": [0, 1, 2, 3, 5, 10, 13, 16, 17, 19, 24, 26, 27, 31, 33, 34, 35, 39, 41, 42, 43], "only_onnxruntim": 43, "onnx": [0, 2, 9, 10, 21, 25, 26, 27, 28, 34, 35, 36, 37, 42, 45], "onnx_adapt": [7, 39, 43], "onnx_convers": 42, "onnx_dynam": 39, "onnx_file_nam": 41, "onnx_model_path": 39, "onnx_quant": [33, 42], "onnxconfig": 18, "onnxconvers": [9, 10, 18, 30, 42], "onnxdynamicquant": [33, 42], "onnxepvalidatemixin": 41, "onnxfloattofloat16": [30, 42], "onnxgraphmixin": 41, "onnxiodatatypeconvert": [30, 42], "onnxmatmul4quant": 42, "onnxmodel": 27, "onnxmodelhandl": [3, 23, 41, 42, 43], "onnxoptim": 22, "onnxopversionconvers": [30, 42], "onnxpeepholeoptim": [22, 42], "onnxquant": [32, 33, 42], "onnxrt_cuda_ep": 43, "onnxrt_trt_ep": 43, "onnxruntim": [3, 22, 27, 30, 35, 37, 42, 43], "onnxruntime_genai": [5, 6, 7], "onnxruntimepackag": 27, "onnxscript": [22, 37], "onnxstaticquant": [33, 42], "onnxtransformersoptim": 42, "op": [30, 42, 43], "op_block_list": [30, 43], "op_typ": 29, "op_type_dict": 43, "op_types_to_quant": 43, "open": [0, 22, 23, 30, 33], "openassist": 18, "openmpi4": [11, 35, 42], "openvino": [0, 5, 6, 10, 21, 35, 42], "openvino_docs_ov_converter_ug_conversion_opt": 43, "openvinoconvers": [23, 42], "openvinoexecutionprovid": [5, 6, 35, 39], "openvinomodelhandl": [41, 42, 43], "openvinoquant": [1, 23, 42], "openvinoquantizationwithaccuraci": 23, "oper": [2, 8, 11, 22, 23, 25, 27, 29, 30, 33, 39, 41, 42, 43], "operation_retry_interv": [11, 42], "opportun": 22, "opset": [30, 39, 43], "opt": [5, 6, 7, 8, 24, 36, 37, 39, 43, 45], "opt_level": [10, 43], "opt_level_list": [39, 43], "optim": [0, 2, 4, 9, 17, 18, 23, 24, 28, 30, 31, 34, 35, 36, 40, 42, 43, 45], "optimization_opt": 43, "optimize_model": 43, "optimum": [0, 18, 42, 43], "optimumconvers": 42, "optimummerg": 42, "option": [2, 3, 5, 9, 10, 17, 22, 25, 28, 30, 32, 35, 40, 43, 44], "optional_input": 43, "optuna": 44, "optyp": 43, "optypes_to_exclude_output_qu": 43, "orchestr": 45, "order": [22, 27, 29, 42, 43, 45], "org": 43, "organ": [12, 17, 42], "orient": 43, "origin": [10, 23, 24, 29, 34, 43], "ort": [5, 6, 9, 33, 39, 42, 43], "ort_log_severity_level": 42, "ort_past_key_nam": 42, "ort_past_value_nam": 42, "ort_present_key_nam": 42, "ort_present_value_nam": 42, "ort_py_log_severity_level": 42, "orthogon": 24, "ortmixedprecis": [30, 42], "ortperftun": 10, "ortsessionparamstun": [9, 18, 22, 42], "orttransformersoptim": [10, 22, 42], "other": [2, 3, 5, 12, 16, 17, 18, 22, 30, 32, 33, 43], "otherwis": [5, 6, 35, 39, 42, 43], "our": 42, "out": [12, 22, 24, 26, 32, 33, 34, 35, 43], "out_1": 43, "out_2": 43, "out_nod": 43, "outlin": [11, 18], "output": [1, 2, 3, 5, 6, 7, 8, 9, 10, 16, 17, 22, 24, 27, 29, 39, 42, 43, 45], "output0": 43, "output0_chanfirst": 43, "output1": [29, 43], "output2": [29, 43], "output_dir": [9, 10, 27, 42, 43], "output_format": 22, "output_index": 22, "output_model": [39, 43], "output_model_num": 42, "output_model_path": 3, "output_nam": [11, 18, 20, 26, 29, 41, 42, 43], "output_nod": 43, "output_path": [5, 6, 7, 8, 39], "output_shap": [26, 41], "outputmodel": [9, 27], "outputs_to_make_channel_last": 43, "outside_layer_modul": 43, "ov_model": 43, "over": [2, 42, 43, 44], "overrid": [18, 27, 39, 43], "overridden": 32, "overrides_config": 43, "overview": [12, 29, 38], "overwrit": 43, "overwrite_cache_record": 43, "overwrite_output_dir": 43, "overwritten": 43, "own": [1, 2, 4, 5, 14, 15, 17, 18, 19, 20, 22, 36, 38, 39, 42], "p": [28, 39], "pack": [22, 43], "pack_input": 22, "packag": [2, 3, 5, 6, 34, 35, 36, 37, 38, 39, 42, 45], "package_config": 39, "packaging_config": [9, 27, 42], "packaging_config_nam": 27, "packagingconfig": [27, 42], "packagingtyp": 27, "pad": 22, "pad_to_max_len": 18, "page": [12, 16, 18], "paged_adamw_32bit": 43, "pair": 43, "pajama": 0, "paper": [24, 34], "parallel": [39, 43], "parallel_job": 43, "param": [1, 3, 5, 6, 7, 17, 22, 42], "param1": 3, "param2": 3, "param3": 3, "param4": 3, "param5": 3, "param6": 3, "paramcategori": 3, "paramet": [2, 3, 8, 11, 17, 22, 23, 24, 25, 26, 28, 30, 32, 33, 34, 39, 40, 42, 43, 44, 45], "parent": [3, 42, 43], "pareto": [2, 42], "pars": 42, "part": 22, "particular": 43, "particularli": 30, "parzen": [2, 42, 44], "pass": [1, 4, 9, 10, 11, 12, 16, 17, 18, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 39, 40, 45], "pass_flow": [10, 42], "passconfigparam": 3, "past": [5, 6, 7, 18, 39, 41, 42, 43], "past_key_valu": [39, 42, 43], "past_key_value_nam": [39, 43], "past_kv_dynamic_axi": 42, "past_present_share_buff": [5, 6, 7], "past_seq_len": 17, "past_sequence_length": 42, "path": [1, 3, 11, 12, 13, 15, 17, 18, 21, 25, 26, 27, 35, 39, 41, 42, 43], "pathlib": 43, "pattern": [7, 22, 24, 30, 31, 42, 43], "pc": 33, "pca": 43, "pdf": 43, "peft": [7, 8, 22, 24, 39], "per": [2, 28, 43], "per_channel": [32, 33, 43], "percdamp": 43, "percent": [10, 19, 42, 43], "percentag": [42, 43], "percentil": 43, "perchannel": 43, "perform": [2, 8, 24, 27, 29, 30, 33, 34, 42, 43, 45], "permut": 29, "perplex": [24, 43], "phi": [28, 33, 39], "phi2": 0, "phi3": 33, "phrase": [5, 6, 7, 8], "phrase_classif": [7, 8], "phrase_classifi": 7, "pip": [15, 16, 23, 35, 38], "pipelin": [2, 15, 42], "pixelstoycbcr": 22, "place": [1, 39, 43], "placehold": [7, 22], "platform": [2, 5, 6, 8, 22, 23, 30, 33, 35], "platform_sdk": 43, "pleas": [3, 9, 11, 12, 15, 18, 19, 22, 23, 24, 25, 26, 27, 28, 30, 31, 33, 34, 35, 37, 42, 43], "plot": 42, "plot_pareto_fronti": 42, "plu": [5, 6, 27], "png": 22, "point": [2, 3, 8, 22, 23, 25, 26, 33, 39, 42, 43, 44], "pool": 22, "poor": 30, "popul": 29, "popular": [17, 28, 33], "posit": [17, 42], "position_id": 18, "possibl": [2, 22, 28, 30, 32, 34, 35, 39], "post": [0, 1, 8, 17, 24, 28, 36, 42, 43], "post_process": [1, 17, 42], "post_process_data": 17, "post_process_data_config": [1, 17, 42], "postprocess": 43, "potenti": [16, 43], "power": [10, 15, 28, 33], "pre": [0, 24, 28, 32, 33, 35, 36, 42, 43], "pre_post_process_quant": 43, "pre_process": 17, "pre_process_data": 17, "pre_process_data_config": [10, 17, 18], "precis": [5, 6, 8, 10, 23, 28, 31, 33, 36, 39, 42, 43, 45], "pred": [19, 42], "predefin": [42, 43], "predict": [17, 26, 42], "predict_with_kv_cach": 39, "prefer": [30, 43], "prefix": [27, 42], "prefix_vocab_mask": 43, "prepar": [18, 43], "prepare_qnn_config": 43, "prepend": 35, "prepend_to_path": 35, "prepostprocessor": [22, 43], "prepostprocessorinput": [22, 43], "preprocess": [17, 33, 39, 42, 43], "prerequisit": [39, 43], "present": [42, 43], "present_kv_dynamic_axi": 42, "preserv": 43, "preset": 43, "presetenum": 43, "press": [5, 6], "pretrain": 24, "previou": 39, "primari": 33, "primit": 36, "print": [1, 5, 6, 7], "prioriti": [1, 10, 18, 19, 27, 39, 42], "priority_hint": 43, "privat": 39, "probabl": 43, "problem": 43, "proce": 16, "process": [0, 1, 5, 6, 15, 16, 17, 18, 23, 24, 26, 28, 33, 36, 39, 42, 43], "produc": [7, 28, 29, 39, 45], "product": 45, "profil": [39, 43], "program": [5, 6], "prompt": [5, 6, 17, 39], "propag": [22, 35], "proper": [22, 24], "proprietari": 39, "proto": [22, 43], "protobuf": 30, "provid": [1, 2, 3, 5, 6, 7, 8, 10, 11, 13, 15, 16, 17, 18, 19, 22, 24, 25, 26, 27, 28, 29, 32, 33, 35, 36, 41, 42, 43, 45], "provider_opt": 27, "provider_options_list": 43, "providers_list": [22, 39, 43], "prune": [8, 24, 33, 43], "pt": [11, 20, 39], "ptl_data_modul": [24, 43], "ptl_modul": [24, 43], "ptldatamodul": 24, "ptlmodul": 24, "public": 37, "publicresourc": 43, "pull": [5, 6], "purpos": 18, "py": [1, 5, 6, 7, 17, 19, 22, 24, 26, 27, 33, 39, 42, 43], "py_vers": [25, 26, 39], "pydant": 43, "pypi": 37, "python": [1, 2, 5, 6, 7, 13, 15, 17, 25, 26, 27, 37, 39, 42, 43], "python_environment_path": 35, "python_system": 35, "pythonenviron": [35, 42], "pythonenvironmentsystem": 2, "pytorch": [0, 6, 7, 8, 21, 25, 28, 30, 33, 36, 42, 45], "pytorch_entire_model": 41, "pytorch_lightn": 43, "pytorchmodel": 20, "pytorchmodelhandl": [23, 39, 41, 42, 43], "q": 36, "q_group_siz": 43, "qat": [0, 24, 43], "qconfig": [24, 43], "qconfig_func": 43, "qdq": [0, 33, 39, 42, 43], "qint8": [33, 43], "qkv": 22, "qlora": [0, 22, 39, 42], "qnn": [0, 21, 30, 33, 39, 42], "qnn_backend": 25, "qnn_extra_opt": 43, "qnn_model": 43, "qnn_sdk_root": 25, "qnncontextbinarygener": [25, 42], "qnnconvers": [25, 42], "qnnexecutionprovid": [5, 6, 39], "qnnmodelhandl": 43, "qnnmodellibgener": [25, 42], "qnnpreprocess": [33, 42], "qoper": 43, "qualcomm": [0, 5, 6, 25, 26, 43, 45], "qualifi": 42, "qualiti": [6, 34, 45], "quant": 43, "quant_data_config": 26, "quant_format": [33, 43], "quant_level": 43, "quant_mod": 43, "quant_preprocess": 43, "quant_typ": [24, 43], "quantiz": [0, 2, 5, 6, 9, 28, 29, 36, 38, 42, 43, 45], "quantizationawaretrain": [24, 42], "quantize_int4": 39, "quantized_tensor": 29, "quantizelinear": 43, "quanttyp": 43, "quarot": 42, "queri": 22, "question": [28, 39], "quick": 10, "quickstart": 5, "quint8": [32, 33, 42, 43], "quot": 43, "r": [39, 43], "rais": [28, 35], "random": [2, 39, 42, 43, 44], "randomli": 2, "rang": [5, 22, 43], "rank": [19, 24], "rapid": 33, "rate": 43, "rather": [8, 43], "ratio": 43, "raw": [17, 43], "raw_data": [17, 29], "raw_dataset": 17, "rawdatacontain": 17, "reach": 33, "read": [5, 7, 11, 18, 23, 35, 42], "read_timeout": [11, 42], "readi": 16, "readthedoc": [43, 44], "real": 43, "rearrang": 43, "recal": 42, "receiv": [3, 42], "recip": 43, "recommend": [5, 26, 36, 37, 43], "recommendation_system": 43, "recov": 8, "recv_nod": 43, "red": 0, "reduc": [7, 8, 22, 23, 24, 33, 34, 43, 45], "reduce_rang": [32, 43], "reducemean": 43, "redund": [10, 22, 32], "ref": 32, "refer": [8, 9, 11, 17, 18, 19, 22, 23, 24, 25, 26, 28, 30, 31, 33, 34, 35, 37, 38, 39, 42, 43, 44], "regardless": 43, "regist": [2, 20, 27, 42], "register_dataload": [1, 17], "register_dataset": 17, "register_post_process": [1, 17], "register_pre_process": 17, "registr": 27, "registri": [0, 1, 5, 6, 7, 17, 39], "registry_nam": [11, 18, 20, 39], "regular": 31, "rel": [11, 20], "relat": [22, 40, 42, 43], "relationship": 2, "relative_path": [11, 20, 42], "reli": 30, "relu": [29, 30, 43], "relu_output": 29, "relunod": 29, "remain": 43, "remot": [7, 14, 17, 35, 36, 38, 39], "remov": [22, 29, 39, 43], "renam": 29, "renamed_input1": [29, 43], "renamed_input2": [29, 43], "renamed_output1": [29, 43], "renamed_output2": [29, 43], "renameinput": 43, "renameoutput": 43, "reorder": 29, "replac": [2, 29, 42, 43, 44], "repo": [5, 6, 8], "repositori": [28, 37], "repres": [3, 22, 29, 30, 33, 35, 41, 43], "represent": [8, 26], "request": [11, 42], "requir": [2, 3, 8, 10, 13, 16, 17, 18, 20, 22, 24, 27, 28, 32, 33, 34, 35, 39, 42, 43], "requirements_fil": [27, 35], "reserv": 18, "reshap": 22, "reshape_1": 26, "resid": [5, 6], "resiz": 22, "resize_to": 22, "resnet": [0, 42], "resolut": 0, "resolv": 43, "resourc": [7, 11, 13, 18, 20, 27, 35, 39, 42], "resource_group": [7, 11, 13, 18, 20, 35, 39, 42], "resource_group_nam": 7, "resourcegroup": 42, "resourcepath": 41, "resourcepathconfig": 41, "respect": [2, 33, 45], "respons": [2, 39, 41], "result": [19, 24, 29, 30, 35, 42, 43], "result_kei": 42, "resume_from_checkpoint": 43, "retain": 30, "retri": [11, 42], "retriev": [18, 42], "return": [1, 2, 3, 19, 39, 42, 43], "reus": 16, "rewrit": 17, "rich": 28, "rmax": 43, "rmin": 43, "rng": 44, "rocmexecutionprovid": [5, 6, 39], "root": [3, 11, 20, 39], "rotat": [34, 42, 43], "rotate_mod": [34, 43], "rotatebas": 43, "rotatemod": 43, "round": 43, "round_interv": 43, "row": [24, 39, 43], "rtn": [6, 39, 43], "rtx": 33, "rule": 10, "run": [2, 5, 6, 7, 8, 10, 11, 12, 14, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 30, 33, 35, 36, 38, 42, 43, 45], "run_config": 39, "runtim": [0, 2, 8, 9, 22, 26, 27, 28, 30, 33, 35, 39, 42, 43, 45], "safe": 15, "safetensor": 39, "sai": [1, 2, 3], "same": [2, 3, 5, 6, 10, 15, 16, 18, 22, 27, 30, 33, 35, 41, 42, 43], "sampl": [2, 39, 42, 43, 44], "samplecod": 27, "sampler": 44, "save": [20, 22, 39, 42, 43], "save_as_external_data": [9, 33, 43], "save_config_fil": 39, "save_format": 43, "save_metadata_for_token_gener": [9, 43], "save_quant_config": 43, "scalabl": 28, "scale": [8, 13, 29, 33, 39, 43], "scale_0": 29, "scaledown": 39, "scenario": [0, 22, 27, 30, 36, 42, 43], "schedul": 43, "schema": [42, 43], "scheme": [24, 43], "scope": 43, "score": 27, "scoring_script": 27, "script": [3, 4, 12, 14, 17, 27, 42, 43], "script_dir": [17, 39, 41, 42, 43], "sdk": [11, 42, 43], "seamless": 33, "search": [3, 10, 32, 39, 40, 42, 43, 44, 45], "search_algorithm": [2, 10, 27, 42], "search_algorithm_config": [10, 27, 42], "search_default": [3, 43], "search_opt": [5, 6], "search_strategi": [10, 27, 42], "searchabl": [2, 3, 32, 33], "searchable_valu": [32, 33], "searcher": 44, "searchparamet": 3, "second": [2, 11, 13, 39, 42], "secret": [18, 42], "section": [2, 11, 18, 20, 28, 42], "see": [12, 22, 30, 39, 42, 43], "seed": [10, 27, 39, 42, 43, 44], "seen": 28, "select": [2, 12, 33, 39, 43], "self": [1, 3], "semi": [24, 43], "sensibl": [5, 6], "sensit": 39, "sentence1": [10, 17], "sentence2": [10, 17], "separ": [1, 11, 22, 37, 39, 42, 43], "seq_len": 17, "seqlen": [39, 43], "sequenc": [9, 22, 36, 39, 42, 43, 45], "sequence_length": [18, 42], "sequence_length_idx": 42, "sequenceconstruct": 22, "sequenti": [29, 43], "serial": 43, "serv": [7, 17, 45], "server": 27, "servic": 18, "session": [15, 22, 28, 43], "session_opt": [27, 42], "session_params_tun": [9, 18], "session_params_tuning_data_config": 22, "sessionopt": 43, "set": [2, 3, 5, 6, 7, 9, 10, 11, 12, 16, 18, 19, 22, 25, 26, 27, 30, 32, 33, 39, 42, 43, 45], "set_active_adapt": 7, "set_search_opt": [5, 6, 7], "setup": [33, 39], "sever": [12, 17, 28, 42], "shape": [22, 29, 39, 42, 43], "share": [14, 42], "shared_kv": 42, "ship": 45, "shot": [8, 24], "should": [1, 3, 12, 13, 17, 29, 30, 39, 41, 42, 43], "show": [7, 17, 28], "show_unconsumed_nod": 43, "shut": 15, "sigmoid": 43, "sign": [5, 6, 43], "signatur": 42, "signific": 24, "similar": 30, "simlar": 34, "simpl": [3, 5, 6, 10, 22, 27], "simple_dataset": 17, "simplest": 30, "simpli": [17, 39], "simplif": 22, "simplifi": [18, 22, 35], "simplifiedlayernorm": [30, 43], "simultan": 24, "sinc": [3, 5, 6, 16, 22, 27, 32, 35], "singl": [0, 7, 8, 17, 27, 28, 29, 33, 34, 43], "site": 43, "situat": 43, "size": [13, 16, 19, 23, 24, 27, 28, 30, 39, 42, 43], "size_threshold": 43, "skip": [16, 30, 42], "skip_dataset": 1, "skiplayernorm": 30, "skipsimplifiedlayernorm": [30, 43], "sku": [27, 35], "slice": [22, 24, 43], "slicegpt": [0, 42], "slm": 28, "slower": 16, "small": [11, 22, 28, 42], "smaller": [24, 42, 43], "smollm": [5, 6], "smooth": 43, "smooth_quant": 43, "smooth_quant_arg": 43, "snapdragon": 26, "snippet": 7, "snpe": [0, 10, 21, 30, 39, 42], "snpe_root": 26, "snpeconvers": [26, 42], "snpedevic": 43, "snpemodelhandl": [41, 42, 43], "snpequant": [26, 42], "snpetoonnxconvers": 42, "so": [3, 5, 6, 8, 25, 27, 28, 30, 43], "soc": 43, "softwar": [25, 26], "solut": 33, "some": [2, 3, 8, 17, 22, 30, 32, 35, 42, 43], "someth": [5, 6], "sometim": [11, 17, 42], "soon": 43, "sort": 27, "sourc": [5, 41, 42, 43], "source_dtyp": [30, 43], "space": [2, 3, 32, 42, 43, 44], "spars": [24, 31, 43], "sparsegpt": [0, 42], "sparsif": 24, "sparsiti": [0, 8, 24, 31, 43], "spec": [2, 28, 35], "special": [39, 43], "specialparamvalu": 43, "specif": [1, 2, 4, 18, 22, 25, 27, 29, 30, 32, 33, 36, 38, 39, 42, 43], "specifi": [1, 3, 10, 11, 12, 13, 18, 19, 20, 24, 27, 28, 29, 30, 33, 35, 39, 42, 43], "spectrogram": 0, "speed": [8, 16, 33, 34, 43], "speedup": 24, "split": [10, 17, 18, 22, 36, 42, 43], "splitmodel": 42, "sport": 7, "squeez": 22, "squeezenet": 0, "ss": 15, "stabl": [0, 37, 43, 44], "stack": 10, "stage": 30, "stand": [23, 30], "standalon": 43, "standard": [33, 42, 43], "standard_nd12": 35, "standard_nd24": 35, "standard_nd24r": 35, "standard_nd40rs_v2": 35, "standard_nd6": 35, "standard_nd96amsr_a100_v4": 35, "standard_nd96asr_v4": 35, "start": [17, 27, 36, 38, 43, 45], "start_header_id": [5, 6, 7, 8], "state": 8, "static": [0, 3, 33, 42, 43], "static_group": 43, "step": [3, 8, 9, 11, 12, 15, 16, 18, 22, 33, 42, 43, 45], "still": [10, 20, 28], "stop": 42, "stop_when_goals_met": 42, "storag": 39, "store": [5, 6, 8, 15, 16, 20, 39, 42, 43], "str": [1, 3, 10, 11, 16, 17, 27, 41, 42, 43], "strategi": [18, 36, 40, 42, 43, 45], "strategy_kwarg": 43, "stream": [5, 6, 11, 42], "strftime": 43, "strict": 43, "strictli": 43, "string": [3, 18, 22, 39, 42, 43], "string_nam": 20, "string_to_int_dim_param": 42, "strive": 33, "structur": [2, 22, 24, 28, 31, 42, 43, 44], "sub": [2, 22, 42, 43], "sub_mul_0": 29, "sub_tanh_0": 29, "sub_typ": [1, 10, 18, 19, 42], "subexpress": 22, "subgraph": [28, 43], "submit": 15, "subscript": [11, 13, 35, 39, 42], "subscription_id": [11, 13, 18, 20, 35, 39, 42], "subset": [10, 17, 39], "subtyp": 42, "successfulli": [7, 8], "suggest": 44, "suit": 30, "suitabl": [30, 43], "super": 0, "superced": 43, "superresolut": 22, "suppli": 39, "support": [1, 2, 3, 5, 7, 10, 11, 16, 18, 20, 22, 23, 24, 30, 31, 32, 33, 34, 35, 36, 39, 41, 42, 43], "suppurt": 42, "sure": [12, 16, 18], "surgeon": [36, 43], "surgeri": [42, 43], "surround": 43, "swift": 33, "sy": 1, "sym": 43, "symbol": [30, 39, 43], "symmetr": [39, 43], "system": [3, 5, 6, 9, 10, 11, 33, 36], "t": [3, 11, 20, 22, 27, 28, 30, 35, 39, 42, 43], "t1": 43, "tabl": [17, 42], "tag": 35, "tail": 43, "tailor": 33, "take": [3, 5, 6, 8, 12, 15, 17, 19, 28, 31, 36, 39, 42, 43], "tanh": 29, "target": [0, 2, 6, 9, 10, 18, 19, 25, 27, 28, 30, 33, 35, 36, 39, 42, 43, 45], "target_devic": 43, "target_dtyp": [30, 43], "target_environ": 27, "target_environment_vers": 27, "target_modul": [39, 43], "target_opset": [9, 22, 30, 39, 42, 43], "task": [1, 4, 5, 10, 17, 18, 22, 39, 41, 42, 45], "team": 45, "techniqu": [2, 3, 4, 22, 24, 28, 33, 34, 36, 42, 43, 45], "technologi": 25, "tempdir": 39, "temperatur": 43, "tempfil": 39, "templat": [5, 6, 10, 39, 42], "tensor": [8, 22, 24, 29, 31, 34, 39, 43], "tensor_nam": 43, "tensorfloat": 43, "tensorflow": [25, 26, 33, 39, 42, 43], "tensorflowmodelhandl": [23, 43], "tensorquantoverrid": 43, "tensorrt": [3, 5, 6, 8, 24, 31, 39, 42, 43], "tensorrtexecutionprovid": [5, 6, 10, 35, 39], "term": 28, "termin": [15, 18], "test": [8, 26, 29, 37, 39, 42, 43], "text": [5, 6, 10, 17, 18, 39, 41, 42], "text_classification_post_process": 17, "text_col": 18, "text_field": 39, "text_generation_huggingface_pre_process": 17, "text_generation_post_process": 17, "text_templ": [7, 8, 39], "tf": 43, "tf2onnx": 43, "tf32": 43, "than": [6, 8, 19, 28, 33, 39, 43], "thei": [11, 17, 27, 35, 39, 42], "them": [1, 13, 18, 19, 22, 25, 26, 28, 30, 33, 34, 43], "therefor": [5, 6, 28, 35], "thi": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 42, 43, 44], "thing": 32, "thread": [22, 39, 43], "three": [2, 32], "threshold": [22, 42, 43], "through": [5, 6, 10, 22, 24, 33, 42], "throughput": [2, 36, 42], "throughput_data_config": 19, "thu": [2, 22, 33, 42], "timdettm": 18, "time": [5, 6, 10, 11, 13, 15, 16, 22, 23, 27, 30, 33, 35, 42, 43, 45], "timeout": [11, 42, 43], "tip": 43, "togeth": [2, 22, 42, 45], "token": [2, 5, 6, 7, 17, 18, 24, 39, 42, 43], "token_type_id": 17, "tokenizer_dir": 33, "tokenizer_stream": [5, 6, 7], "toler": 43, "tone": [7, 8], "too": [11, 28, 42], "tool": [8, 10, 13, 22, 25, 26, 28, 33, 36, 42, 43], "tool_command": [22, 43], "tool_command_arg": [22, 43], "toolkit": [0, 23, 45], "tools_snp": 43, "top": [15, 20, 27, 42], "topic": 43, "torch": [30, 31, 34, 39, 42, 43], "torch_dtyp": [39, 43], "torch_metr": 42, "torch_tensorrt": 31, "torchmetr": 42, "torchtrt": 0, "torchtrtconvers": 42, "total_sequence_length": 18, "tp": 2, "tpe": [2, 10, 27, 39, 42, 44], "tpesampl": 44, "trade": 34, "train": [0, 8, 18, 22, 30, 34, 39, 42, 43], "train_data_config": [24, 43], "train_split": 39, "train_subset": 39, "trainabl": [22, 24, 43], "trainer": 43, "training_arg": [24, 43], "training_loop_func": [24, 43], "trainingargu": 43, "transform": [0, 5, 8, 17, 18, 23, 24, 28, 29, 30, 31, 34, 37, 42, 43, 45], "transformer_token_dummy_data": 9, "transformers_dummy_data_config": 17, "transformers_optim": 42, "transformersdummydatacontain": 17, "transformerspromptdummydatacontain": 17, "transformerstokendummydatacontain": [9, 17], "transit": 23, "transpos": [22, 43], "tree": [2, 42, 44], "trial": [2, 45], "trt": [0, 10], "trt_fp16": 10, "trt_fp16_enabl": [39, 43], "trt_perf_tun": 10, "trt_transformers_optim": 10, "trtmodul": 31, "true": [3, 10, 16, 18, 19, 22, 26, 27, 30, 32, 33, 35, 39, 42, 43, 44], "true_sequenti": 43, "trust": [17, 39], "trust_remote_cod": [5, 6, 7, 8, 17, 39], "try": [5, 6, 10, 42], "tune": [0, 2, 3, 7, 8, 24, 33, 34, 42, 43, 45], "tuning_criterion": 43, "tuningcriterion": 43, "turn": [2, 15, 36, 43], "tutori": [12, 22], "two": [2, 22, 32, 33, 35, 43], "txt": [17, 25, 27, 35], "type": [1, 2, 3, 9, 10, 11, 15, 17, 18, 20, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 41, 42, 43, 44], "type_": 3, "typic": [16, 35, 45], "u": 22, "ubuntu20": 42, "ubuntu22": [11, 35], "uint16": 39, "uint32": 39, "uint4": [39, 43], "uint8": [22, 39, 43], "uncas": [10, 17], "under": [10, 17, 42], "undergo": 23, "underneath": [22, 44], "underscor": 17, "understand": 28, "unet": 43, "uniform": 23, "union": [3, 42], "uniqu": [17, 42], "unit": 22, "unless": 43, "unset": 43, "unsign": 43, "unsqueez": 22, "unstructur": 24, "unsupport": 33, "until": 16, "unus": 22, "up": [5, 6, 7, 8, 10, 16, 18, 19, 24, 34, 38, 43], "updat": [7, 8, 30, 43], "update_shared_cach": 16, "upload": [3, 16, 27], "upon": 34, "url": [16, 42], "us": [1, 2, 3, 8, 9, 12, 13, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44], "usabl": 33, "usag": [18, 24, 39], "use_audio_decod": 22, "use_dynamo_export": [39, 43], "use_enhanced_quant": 43, "use_external_data_format": 43, "use_forced_decoder_id": 43, "use_gpu": 43, "use_gqa": 43, "use_int4": 43, "use_logits_processor": 43, "use_model_build": 39, "use_ort_extens": [27, 41], "use_ort_genai": [5, 6, 7, 8, 39], "use_prefix_vocab_mask": 43, "use_qdq_encod": 39, "use_symbolic_shape_inf": 43, "use_temperatur": 43, "use_transpose_op": 43, "use_vocab_mask": 43, "user": [2, 3, 5, 6, 7, 8, 10, 11, 17, 22, 27, 28, 30, 32, 33, 34, 35, 36, 39, 41, 42, 43, 45], "user_config": [19, 42], "user_dir": 17, "user_input": 7, "user_script": [17, 19, 22, 24, 26, 33, 42, 43], "usernam": [5, 6, 8], "usual": 33, "util": [28, 42, 43], "v2": 26, "vae": 43, "vai_q_onnx": 43, "val_data_config": 43, "valid": [3, 10, 17, 22, 23, 41, 42, 43], "validation_func": 23, "valu": [1, 2, 3, 8, 10, 11, 13, 17, 18, 19, 22, 24, 27, 29, 30, 32, 33, 39, 42, 43], "value1": 39, "value2": 39, "value_info": 29, "var": 27, "variabl": [12, 25, 26, 35, 42], "varieti": 33, "variou": [22, 39], "vault": 18, "vector": 8, "vendor": 45, "verbos": 42, "verif": 12, "version": [11, 18, 20, 23, 27, 29, 35, 37, 39, 42, 43], "versu": 7, "vgg": 0, "via": [7, 11, 42, 43], "video": 33, "virtual": [5, 35, 37], "virtualenv": 35, "vision": 0, "visual": 23, "viti": [0, 42, 43], "vitis_ai_quant": 33, "vitisaiexecutionprovid": 39, "vitisaiquant": [33, 42], "vm": [13, 39], "vm_size": [13, 39], "vnni": 43, "vocab_mask": 43, "vpu": 43, "vscode": 42, "w": 43, "w_bit": [34, 43], "wa": 34, "wai": [16, 17, 19, 22, 24, 28, 33, 39, 43], "want": [7, 8, 11, 15, 16, 17, 18, 27, 30, 37, 42, 43], "warmup": 43, "warmup_ratio": 43, "warn": [39, 42], "we": [2, 5, 10, 12, 17, 22, 26, 28, 32, 36, 37, 42, 43], "web": 22, "webgpu": 30, "weight": [0, 2, 5, 6, 7, 8, 22, 23, 24, 26, 31, 33, 34, 39, 42, 43], "weight_correct": 43, "weight_onli": [33, 43], "weight_only_config": [33, 43], "weight_only_quant_config": 43, "weight_precis": 39, "weight_typ": [32, 33, 42, 43], "weightsfileformat": 43, "weightsymmetr": 43, "well": [17, 23, 43], "were": [27, 42, 43], "what": 43, "when": [1, 2, 3, 16, 18, 22, 23, 26, 27, 32, 35, 39, 42, 43, 44], "where": [1, 11, 13, 17, 22, 24, 27, 28, 30, 31, 35, 36, 39, 42, 43], "whether": [3, 17, 22, 27, 28, 32, 39, 42, 43], "which": [2, 3, 5, 6, 7, 10, 11, 17, 18, 19, 22, 23, 24, 25, 27, 30, 33, 34, 35, 39, 41, 42, 43, 45], "while": [3, 5, 6, 7, 22, 28, 30, 33, 43], "whisper": [0, 22, 41, 42, 43], "whisperbeamsearch": [42, 43], "whose": [29, 43], "width": [22, 24], "wikitext2": 24, "wikitext2_train": 34, "window": [5, 6, 16, 43], "wise": 43, "wish": 8, "with_replac": 44, "within": [11, 18], "without": [2, 15, 16, 34, 36, 39, 42, 44], "wonder": 7, "word_length": 42, "work": [34, 42], "workflow": [1, 2, 4, 11, 14, 16, 20, 27, 38, 39, 43, 45], "workflow_host": [15, 42], "workflow_id": [15, 42], "workspac": [2, 7, 11, 12, 13, 18, 20, 27, 35, 39, 42, 43], "workspace_nam": [7, 11, 13, 18, 20, 35, 39, 42], "workspaceblobstor": [15, 35], "world_siz": 42, "would": [7, 10, 17, 43], "write": [1, 10, 22, 23], "x": 43, "x86_64": [25, 43], "xilinx": 42, "xl": 0, "xxyyzzz": [7, 8], "y": [39, 43], "y1_uint8": 22, "yaml": [11, 35, 42, 45], "ycbcrtopixel": 22, "ye": 39, "yet": 22, "yml": 42, "you": [1, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 25, 26, 27, 29, 30, 32, 34, 35, 36, 37, 39, 42, 43, 45], "your": [0, 1, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 25, 26, 27, 34, 36, 37, 38, 39, 42, 43, 45], "yourself": 19, "yyyi": 15, "zero": [8, 29, 33, 43], "zero_point": [29, 43], "zip": [9, 26, 27], "zipfil": 9}, "titles": ["Examples", "Custom Scripts", "Design", "How to add new optimization Pass", "Extending Olive", "Getting started", "Auto Optimization", "Finetune", "Quantize", "Run Olive workflows", "How to use Automatic Optimizer", "Azure AI Integration", "Self-hosted Kubernetes cluster", "Azure ML scripts", "Azure AI", "Remote Workflow", "Shared Cache", "How To Configure Data", "Huggingface Integration", "How To Configure Metrics", "How To Set Model Path", "Model Configuration", "ONNX", "OpenVINO", "PyTorch", "QNN", "SNPE", "Packaging Olive artifacts", "Model Splitting", "ONNX Surgeon Classes Documentation", "ONNX", "PyTorch", "How to configure a Workflow Pass", "ONNX Quantization", "PyTorch Quantization", "How To Configure Systems", "How-to", "Installation", "Olive: The AI Model Optimization Toolkit for the ONNX Runtime", "Command Line Tools", "Reference", "OliveModels", "Olive Options", "Passes", "SearchAlgorithms", "Overview"], "titleterms": {"0": [5, 6, 7, 8, 9], "001c": [6, 7, 8, 9], "002": [6, 7, 8, 9], "002l": 9, "003": [6, 7, 8, 9], "003a1": 9, "004": [6, 7, 8, 9], "004l": [6, 7, 8, 9], "004l9": [6, 7, 8, 9], "004z": [6, 7, 8, 9], "005": 9, "006": [6, 7, 8, 9], "006a": [6, 7, 8, 9], "008": [6, 7, 8, 9], "009": [6, 7, 8, 9], "009l": [6, 7, 8, 9], "018": 9, "02": 9, "02l": 9, "034l": [6, 7, 8, 9], "036": [6, 7, 8, 9], "042": 9, "042z": 9, "045": 9, "047": [6, 7, 8, 9], "058": 9, "06": 9, "064": 9, "06l": 9, "06l2": [6, 7, 8, 9], "06zm6": 9, "074l3": [6, 7, 8, 9], "088": 9, "09": 9, "0em": [5, 6, 7, 8, 9], "0h": 9, "0h12": 9, "0h2a": [5, 7], "0l2": 9, "0v": [5, 7], "0v1": [5, 7], "0z": 9, "0zm8": 9, "1": [3, 5, 6, 7, 8, 9, 23], "10": [6, 7, 8, 9], "102a1": 9, "103": 9, "11": 9, "112": 9, "116": 9, "117": 9, "118": 9, "119": 9, "119l": 9, "11l1": 9, "12": [6, 7, 8, 9], "123": 9, "123c1": [6, 7, 8, 9], "123zm3": 9, "124": 9, "125": 9, "125l3": 9, "127v1": 9, "12zm6": 9, "138": 9, "14": 9, "142a1": [6, 7, 8, 9], "143": 9, "145": 9, "145l": 9, "148a": 9, "15": 9, "157": [6, 7, 8, 9], "16": [5, 6, 7, 8, 9], "16h1": 9, "172": 9, "175l": 9, "176": 9, "177": 9, "178": 9, "186a7": 9, "186a8": 9, "186c15": 9, "186l": 9, "188": [6, 7, 8, 9], "2": [3, 5, 6, 7, 8, 9, 23], "201": 9, "201c": 9, "204c": 9, "207c0": 9, "213": 9, "213zm3": 9, "214": 9, "215": 9, "215l": 9, "216": 9, "218": 9, "22": [6, 7, 8, 9], "234": 9, "249": [6, 7, 8, 9], "25": [5, 6, 7, 8, 9], "25a": [5, 6, 7, 8, 9], "25a2": [5, 7], "25h": [5, 7, 9], "25h12": 9, "25v1": 9, "25v12": 9, "25v8h": [5, 7], "25v9": [5, 7], "25zm1": 9, "25zm7": 9, "26": 9, "266": 9, "266h3": 9, "273": 9, "275": 9, "286": [6, 7, 8, 9], "288": 9, "2a": 9, "2h6": [5, 7], "3": [3, 5, 6, 7, 8, 9], "302a1": [6, 7, 8, 9], "309a": 9, "31": 9, "314": 9, "315": 9, "317": 9, "326": 9, "334": 9, "336": [5, 7], "338": 9, "34": [6, 7, 8, 9], "346": [6, 7, 8, 9], "35": 9, "354h": [6, 7, 8, 9], "4": [6, 7, 8, 9], "414": [5, 7], "415": [6, 7, 8, 9], "416": 9, "418": [6, 7, 8, 9], "418l": 9, "429zm1": [6, 7, 8, 9], "437": [6, 7, 8, 9], "43a1": [6, 7, 8, 9], "458": 9, "458a7": 9, "458a8": 9, "458c": 9, "459": 9, "459c": 9, "462": [6, 7, 8, 9], "462c": [6, 7, 8, 9], "47": 9, "47a": 9, "499": 9, "49l": 9, "4h3": 9, "5": [5, 6, 7, 8, 9], "502": 9, "503": [6, 7, 8, 9], "504": [6, 7, 8, 9], "516": [6, 7, 8, 9], "521": 9, "528": 9, "53": 9, "538": [6, 7, 8, 9], "53a": 9, "548": 9, "56": 9, "564": 9, "56c": 9, "57": [6, 7, 8, 9], "571a": [6, 7, 8, 9], "583": 9, "59": 9, "598": 9, "5a": [5, 7, 9], "5a1": 9, "5a2": [5, 7], "5c": 9, "5c0": [5, 7, 9], "5c15": 9, "5h": [5, 7], "5h2": [6, 7, 8, 9], "5h3": [5, 6, 7, 8, 9], "5h7": [5, 7], "5l": 9, "5v": [5, 7], "5z": [5, 7], "5zm3": [5, 7], "6": [5, 6, 7, 8, 9], "608": 9, "616": 9, "623h": 9, "633": [6, 7, 8, 9], "641": 9, "665": 9, "671": 9, "678": 9, "683": [6, 7, 8, 9], "69": 9, "694": [6, 7, 8, 9], "7": [5, 6, 7, 8, 9], "707a": 9, "71": 9, "713": [6, 7, 8, 9], "713l10": [6, 7, 8, 9], "717a": 9, "723l5": [6, 7, 8, 9], "732": 9, "734": 9, "734l10": 9, "74": 9, "748": 9, "749": 9, "75": [5, 6, 7, 8, 9], "751": 9, "752": 9, "754": 9, "75a": [5, 6, 7, 8, 9], "75a1": 9, "75a2": [5, 7], "75c0": 9, "75h": [5, 7], "75h8": [5, 7], "75v": [5, 7, 9], "75v1": [5, 7], "75v12": 9, "75v2": [5, 7], "75v3": [5, 7], "75v5": [5, 7], "75v6": [5, 7], "75z": [5, 7], "75zm5": [5, 7], "76": 9, "766c": 9, "774": 9, "774a1": 9, "78": 9, "783": 9, "784": 9, "793a": [6, 7, 8, 9], "7h9": [6, 7, 8, 9], "8": [5, 6, 7, 8, 9], "816a1": 9, "833": 9, "871": [6, 7, 8, 9], "8l1": 9, "9": [5, 7, 9], "902": 9, "904": 9, "904v3": 9, "909": [6, 7, 8, 9], "935c": 9, "938": 9, "94": 9, "949": 9, "954": 9, "95zm12": 9, "979": 9, "979l": 9, "97a": 9, "982": [6, 7, 8, 9], "9h6": [6, 7, 8, 9], "The": 38, "To": [17, 19, 20, 35], "acceler": 35, "accuraci": 19, "adapt": [7, 22, 39], "add": [3, 12], "advanc": 36, "ai": [11, 14, 33, 38], "algorithm": 2, "altern": 16, "amd": 33, "aml": 12, "aml_config": 42, "aml_config_path": 42, "an": [11, 15], "append": 22, "appendprepostprocessingop": 43, "approach": 28, "arc": 12, "argument": [5, 6, 39], "aria": [5, 6, 7, 8, 9], "artifact": 27, "auto": [6, 7, 10, 28, 39], "autoawq": 34, "autoawqquant": 43, "autogptq": 34, "automat": [5, 10], "avail": 5, "awar": 24, "azur": [11, 12, 13, 14, 15, 16, 18, 42], "azureml": [11, 15, 18, 20, 35, 39, 42], "azureml_cli": 42, "azuremldata": 27, "azuremldeploy": 27, "azuremlmodel": 27, "beam": 22, "benefit": 45, "binari": 25, "blob": 16, "builder": 39, "built": 17, "cach": [16, 39], "candidatemodel": 27, "captur": 39, "capturesplitinfo": 43, "class": [3, 5, 6, 7, 8, 9, 29], "cli": [28, 36], "client": [11, 42], "cluster": 12, "code": 9, "command": 39, "compon": 17, "composit": 41, "compressor": 33, "comput": [11, 15], "conclus": 28, "config": [17, 18, 27, 42], "configur": [3, 10, 15, 16, 17, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 41], "context": 25, "convers": [23, 25, 26, 30], "convert": [30, 39], "cost": [28, 39], "creat": 12, "curat": 11, "custom": [1, 17, 19], "d": [5, 6, 7, 8, 9], "data": [11, 17], "dataset": 18, "datastor": [11, 20], "datatyp": 30, "defin": [3, 9], "depend": [15, 16, 37], "dependabot": [5, 7], "descript": 29, "design": 2, "detail": [5, 6], "develop": 23, "distribut": 41, "docker": [18, 35], "dockerfil": 27, "document": 29, "download": [25, 26], "dynam": 30, "dynamictofixedshap": 43, "edit": 37, "engin": [2, 42], "environ": [18, 35], "evalu": [2, 42], "exampl": [0, 1, 3, 22, 23, 24, 25, 26, 29, 30, 31, 33, 34, 42], "execut": 2, "exhaustivesearchalgorithm": 44, "export": 39, "exposeoutput": 29, "exposequantizedoutput": 29, "extend": 4, "extens": 12, "extra": [15, 23], "extract": 22, "extractadapt": 43, "face": 5, "field": 42, "file": [9, 20, 27, 39], "finetun": [7, 8, 39], "fix": 30, "float16": 30, "flow": 42, "folder": 20, "from": [11, 23, 37], "function": 3, "gener": [17, 25, 28, 39], "get": 5, "gptqquantiz": 43, "graph": 39, "graphsurgeri": 43, "handler": 41, "height": [5, 6, 7, 8, 9], "hf": 41, "hftrainingargu": 43, "hidden": [5, 6, 7, 8, 9], "host": [11, 12, 42], "how": [3, 10, 17, 19, 20, 27, 32, 35, 36], "hub": 18, "hug": 5, "huggingfac": 18, "i": [27, 45], "id": 42, "implement": 3, "import": [16, 29], "incdynamicquant": 43, "incquant": 43, "incstaticquant": 43, "infer": [5, 6, 7, 27], "infershap": 29, "inform": [39, 42], "input": [18, 30, 39, 42], "insert": 22, "insertbeamsearch": 43, "instal": [5, 15, 16, 23, 37], "integr": [11, 18, 36], "intel": 33, "introduct": 18, "isol": 35, "job": [11, 20], "json": [27, 42], "kubernet": 12, "latenc": 19, "learn": [11, 12, 15], "librari": 25, "line": 39, "link": 12, "list": 13, "load": 18, "local": [12, 18, 20, 35], "loftq": [24, 43], "log": 5, "login": 18, "lora": [24, 39, 43], "m0": 9, "m14": 9, "m5": [5, 7], "m6": [5, 7], "m9": [6, 7, 8, 9], "machin": [11, 12, 15], "manag": 35, "manage_compute_inst": 13, "map": 30, "matmulnbitstoqdq": 43, "mergeadapterweight": [24, 43], "metric": [18, 19, 27], "mix": 30, "mixedprecisionoverrid": 43, "ml": [11, 13, 18, 42], "model": [5, 6, 7, 11, 18, 20, 21, 23, 25, 26, 27, 28, 30, 33, 38, 39, 41, 42], "modelbuild": 43, "more": [5, 6], "multipl": 19, "name": [20, 39], "nativ": 35, "neural": 33, "new": 3, "note": [16, 29], "notebook": 5, "nvidia": 33, "octicon": [5, 6, 7, 8, 9], "oliv": [4, 5, 9, 15, 23, 25, 26, 27, 38, 42, 45], "olivemodel": 41, "onnx": [5, 6, 7, 8, 22, 29, 30, 33, 38, 39, 41, 43], "onnxconvers": 43, "onnxdynamicquant": 43, "onnxfloattofloat16": 43, "onnxiodatatypeconvert": 43, "onnxmatmul4quant": 43, "onnxopversionconvers": 43, "onnxpeepholeoptim": 43, "onnxquant": 43, "onnxruntim": [33, 39], "onnxstaticquant": 43, "op": 22, "openvino": [23, 41, 43], "openvinoconvers": 43, "openvinoquant": 43, "opt": 28, "optim": [3, 5, 6, 7, 8, 10, 21, 22, 33, 38, 39], "optimumconvers": 43, "optimummerg": 43, "option": [23, 37, 39, 42], "order": 2, "ort": [22, 35], "ortmixedprecis": 43, "ortsessionparamstun": 43, "orttransformersoptim": 43, "output": [11, 15, 20, 30], "overview": [16, 45], "packag": 27, "param": 39, "pass": [2, 3, 32, 42, 43], "path": [5, 6, 7, 8, 9, 20], "peeophol": 22, "perform": 22, "pip": [5, 37], "post": [22, 23, 26], "pre": [8, 22], "precis": 30, "prerequisit": [23, 25, 26, 35], "process": [8, 22], "provid": 39, "ptq": [23, 26], "pypi": 23, "python": [18, 35], "pytorch": [24, 31, 34, 39, 41, 43], "qlora": [24, 43], "qnn": [25, 43], "qnncontextbinarygener": 43, "qnnconvers": 43, "qnnmodellibgener": 43, "qnnpreprocess": 43, "qualcomm": 39, "quantiz": [8, 23, 24, 25, 26, 33, 34, 39], "quantizationawaretrain": 43, "quarot": [34, 43], "quickstart": [6, 7, 8, 9], "randomsearchalgorithm": 44, "rank": 27, "readymad": 35, "refer": 40, "regist": 11, "registri": 20, "remot": [15, 20], "removeinitializerfrominput": 29, "removeinput": 29, "removeshap": 29, "renameinput": 29, "renameoutput": 29, "reorderinput": 29, "replaceerfwithtanh": 29, "rocket": 9, "run": [3, 9, 15, 39], "runtim": [5, 6, 7, 23, 38], "script": [1, 13, 39], "script_dir": 1, "sd": [5, 6, 7, 8, 9], "sdk": [25, 26, 39], "search": [2, 22], "searchalgorithm": 44, "self": 12, "session": 39, "set": [20, 36], "setup": 16, "shape": 30, "share": [16, 39], "slicegpt": [24, 43], "snpe": [26, 41, 43], "snpeconvers": 43, "snpequant": 43, "snpetoonnxconvers": 43, "sourc": 37, "sparsegpt": [24, 43], "spinquant": [34, 43], "split": [28, 39], "splitmodel": 43, "squar": 9, "start": 5, "storag": 16, "store": 11, "strategi": 2, "string": 20, "support": [8, 17], "surgeon": 29, "surgeri": 29, "svg": [5, 6, 7, 8, 9], "system": [2, 15, 18, 35, 42], "target": 11, "techniqu": 8, "templat": 17, "tensorrt": 33, "throughput": 19, "tool": [23, 39], "toolkit": 38, "torchtrtconvers": [31, 43], "tpesearchalgorithm": 44, "train": [23, 24, 26], "transform": 22, "true": [5, 6, 7, 8, 9], "try": 45, "tune": [22, 39], "type": 19, "unzip": [25, 26], "up": 36, "us": [5, 6, 7, 10, 11, 45], "usag": [13, 16], "user_script": 1, "version": [5, 6, 7, 8, 9], "viewbox": [5, 6, 7, 8, 9], "viti": 33, "vitisaiquant": 43, "what": [27, 45], "width": [5, 6, 7, 8, 9], "window": 33, "work": 36, "workflow": [9, 15, 32, 36, 42], "workspac": 15, "yaml": 9, "zap": [6, 7, 8, 9], "zerooutinput": 29, "zipfil": 27}}) \ No newline at end of file +Search.setIndex({"alltitles": {" Install with pip": [[5, "install-with-pip"]], "1. Define a new class": [[3, "define-a-new-class"]], "2. Define configuration": [[3, "define-configuration"]], "3. Implement the run function": [[3, "implement-the-run-function"]], " Define the workflow in a YAML file": [[9, "define-the-workflow-in-a-yaml-file"]], " Auto-Optimize the model and adapters": [[7, "auto-optimize-the-model-and-adapters"]], " Automatic model optimization with Olive": [[5, "automatic-model-optimization-with-olive"]], " Run the workflow": [[9, "run-the-workflow"]], " Quickstart": [[6, "quickstart"], [7, "quickstart"], [8, "quickstart"], [9, "quickstart"]], "Accelerator Configuration": [[35, "accelerator-configuration"]], "Accuracy Metric": [[19, "accuracy-metric"]], "Add AML extension to cluster": [[12, "add-aml-extension-to-cluster"]], "Add local Kubernetes cluster to Azure Arc": [[12, "add-local-kubernetes-cluster-to-azure-arc"]], "Alternative Configuration": [[16, "alternative-configuration"]], "Append Pre/Post Processing Ops": [[22, "append-pre-post-processing-ops"]], "AppendPrePostProcessingOps": [[43, "appendprepostprocessingops"]], "Approach": [[28, "approach"]], "Auto Optimization": [[6, null]], "Auto Optimizer Configuration": [[10, "auto-optimizer-configuration"]], "Auto-Optimization": [[39, "auto-optimization"]], "AutoAWQ": [[34, "autoawq"]], "AutoAWQQuantizer": [[43, "autoawqquantizer"]], "AutoGPTQ": [[34, "autogptq"]], "Azure AI": [[14, null]], "Azure AI Integration": [[11, null]], "Azure ML Client": [[42, "azure-ml-client"]], "Azure ML model": [[18, "azure-ml-model"]], "Azure ML scripts": [[13, null]], "Azure Machine Learning client": [[11, "azure-machine-learning-client"]], "AzureML": [[39, "azureml"]], "AzureML Datastore": [[20, "azureml-datastore"]], "AzureML Job Output": [[20, "azureml-job-output"]], "AzureML Model": [[20, "azureml-model"]], "AzureML Readymade Systems": [[35, "azureml-readymade-systems"]], "AzureML Registry Model": [[20, "azureml-registry-model"]], "AzureML System": [[35, "azureml-system"]], "AzureML system": [[18, "azureml-system"]], "AzureMLData": [[27, "azuremldata"]], "AzureMLDeployment": [[27, "azuremldeployment"]], "AzureMLModels": [[27, "azuremlmodels"]], "Benefits of using Olive": [[45, "benefits-of-using-olive"]], "CLI": [[28, "cli"]], "CandidateModels": [[27, "candidatemodels"]], "Capture Onnx Graph": [[39, "capture-onnx-graph"]], "CaptureSplitInfo": [[43, "capturesplitinfo"]], "Command Line Tools": [[39, null]], "Composite Model Handler": [[41, "composite-model-handler"]], "Conclusion": [[28, "conclusion"]], "Configs with built-in component": [[17, "configs-with-built-in-component"]], "Configs with customized component": [[17, "configs-with-customized-component"]], "Configurations": [[29, "configurations"], [29, "id3"], [29, "id12"], [29, "id17"], [29, "id20"], [29, "id23"], [29, "id26"]], "Configure Azure Blob Storage": [[16, "configure-azure-blob-storage"]], "Configure Olive QNN": [[25, "configure-olive-qnn"]], "Configure Olive SNPE": [[26, "configure-olive-snpe"]], "Configure Workflows (Advanced)": [[36, "configure-workflows-advanced"]], "Configure an AzureML system": [[15, "configure-an-azureml-system"]], "Configure multiple metrics": [[19, "configure-multiple-metrics"]], "Configure the Shared Cache": [[16, "configure-the-shared-cache"]], "Context Binary Generation": [[25, "context-binary-generation"]], "Convert Adapters": [[39, "convert-adapters"]], "Convert dynamic shape to fixed shape": [[30, "convert-dynamic-shape-to-fixed-shape"]], "Create Kubernetes cluster": [[12, "create-kubernetes-cluster"]], "Custom Metric": [[19, "custom-metric"]], "Custom Scripts": [[1, null]], "Datatype Mapping": [[30, "datatype-mapping"]], "Description": [[29, "description"], [29, "id2"], [29, "id5"], [29, "id7"], [29, "id9"], [29, "id11"], [29, "id14"], [29, "id16"], [29, "id19"], [29, "id22"], [29, "id25"]], "Design": [[2, null]], "Distributed Hf Model Handler": [[41, "distributed-hf-model-handler"]], "Distributed Onnx Model Handler": [[41, "distributed-onnx-model-handler"]], "Docker System": [[35, "docker-system"]], "Dockerfile": [[27, "dockerfile"]], "Download and unzip QNN SDK": [[25, "download-and-unzip-qnn-sdk"]], "Download and unzip SNPE SDK": [[26, "download-and-unzip-snpe-sdk"]], "DynamicToFixedShape": [[43, "dynamictofixedshape"]], "Editable install": [[37, "editable-install"]], "Engine": [[2, "engine"]], "Engine Information": [[42, "engine-information"]], "Evaluator": [[2, "evaluator"]], "Evaluators Information": [[42, "evaluators-information"]], "Example": [[3, "example"], [29, "example"], [29, "id1"], [29, "id4"], [29, "id6"], [29, "id8"], [29, "id10"], [29, "id13"], [29, "id15"], [29, "id18"], [29, "id21"], [29, "id24"], [29, "id27"], [42, "example"], [42, "id1"], [42, "id2"], [42, "id3"], [42, "id4"], [42, "id5"], [42, "id6"]], "Example Configuration": [[22, "example-configuration"], [22, "id1"], [22, "id2"], [22, "id3"], [22, "id4"], [22, "id5"], [23, "example-configuration"], [23, "id1"], [24, "example-configuration"], [24, "id1"], [24, "id2"], [24, "id3"], [24, "id4"], [24, "id5"], [25, "example-configuration"], [25, "id1"], [25, "id2"], [26, "example-configuration"], [26, "id1"], [30, "example-configuration"], [30, "id1"], [30, "id2"], [30, "id3"], [30, "id4"], [30, "id5"], [31, "example-configuration"], [33, "example-configuration"], [33, "id1"], [33, "id2"], [33, "id3"], [34, "example-configuration"], [34, "id1"], [34, "id2"], [34, "id3"]], "Examples": [[0, null], [1, "examples"], [1, "id1"]], "Execution Order": [[2, "execution-order"]], "ExhaustiveSearchAlgorithm": [[44, "exhaustivesearchalgorithm"]], "ExposeOutputs": [[29, "exposeoutputs"]], "ExposeQuantizedOutput": [[29, "exposequantizedoutput"]], "Extending Olive": [[4, null]], "Extract Adapters": [[22, "extract-adapters"]], "ExtractAdapters": [[43, "extractadapters"]], "Finetune": [[7, null], [39, "finetune"]], "Float16 Conversion": [[30, "float16-conversion"]], "Generate Adapters": [[39, "generate-adapters"]], "Generate Cost Model for Model Splitting": [[39, "generate-cost-model-for-model-splitting"]], "Generic Data Config": [[17, "generic-data-config"]], "Getting started": [[5, null]], "GptqQuantizer": [[43, "gptqquantizer"]], "GraphSurgeries": [[43, "graphsurgeries"]], "Hf Model Handler": [[41, "hf-model-handler"]], "How To Configure Data": [[17, null]], "How To Configure Metrics": [[19, null]], "How To Configure Systems": [[35, null]], "How To Set Model Path": [[20, null]], "How to add new optimization Pass": [[3, null]], "How to configure a Workflow Pass": [[32, null]], "How to package Olive artifacts": [[27, "how-to-package-olive-artifacts"]], "How to use Automatic Optimizer": [[10, null]], "How-to": [[36, null]], "Huggingface Hub model": [[18, "huggingface-hub-model"]], "Huggingface Integration": [[18, null]], "Huggingface datasets": [[18, "huggingface-datasets"]], "Huggingface login": [[18, "huggingface-login"]], "Huggingface metrics": [[18, "huggingface-metrics"]], "Important Note": [[29, "important-note"]], "Important Notes": [[16, "important-notes"]], "IncDynamicQuantization": [[43, "incdynamicquantization"]], "IncQuantization": [[43, "incquantization"]], "IncStaticQuantization": [[43, "incstaticquantization"]], "InferShapes": [[29, "infershapes"]], "Inference config file": [[27, "inference-config-file"]], "Inference model using ONNX Runtime": [[5, "inference-model-using-onnx-runtime"], [6, "inference-model-using-onnx-runtime"], [7, "inference-model-using-onnx-runtime"]], "Input Model": [[18, "input-model"]], "Input Model Information": [[42, "input-model-information"]], "Inputs/Outputs DataType Conversion": [[30, "inputs-outputs-datatype-conversion"]], "Insert Beam Search Op": [[22, "insert-beam-search-op"]], "InsertBeamSearch": [[43, "insertbeamsearch"]], "Install Dependencies": [[16, "install-dependencies"]], "Install Extra Dependencies": [[15, "install-extra-dependencies"]], "Install from source": [[37, "install-from-source"]], "Install with pip": [[37, "install-with-pip"]], "Installation": [[37, null]], "Integrations": [[36, "integrations"]], "Introduction": [[18, "introduction"]], "Isolated ORT System": [[35, "isolated-ort-system"]], "Latency Metric": [[19, "latency-metric"]], "Link Azure Arc Kubernetes cluster to Azure Machine Learning": [[12, "link-azure-arc-kubernetes-cluster-to-azure-machine-learning"]], "LoRA": [[24, "lora"], [43, "lora"]], "LoRA options": [[39, "lora-options"]], "LoRA/QLoRA/LoftQ HFTrainingArguments": [[43, "lora-qlora-loftq-hftrainingarguments"]], "Local File": [[20, "local-file"]], "Local Folder": [[20, "local-folder"]], "Local Model Path": [[20, "local-model-path"]], "Local System": [[35, "local-system"]], "Local model": [[18, "local-model"]], "Local system, docker system and Python environment system": [[18, "local-system-docker-system-and-python-environment-system"]], "LoftQ": [[24, "loftq"], [43, "loftq"]], "Log-in to Hugging Face": [[5, "log-in-to-hugging-face"]], "Managed AzureML System": [[35, "managed-azureml-system"]], "Managed Docker System": [[35, "managed-docker-system"]], "Managed Python Environment System": [[35, "managed-python-environment-system"]], "MatMulNBitsToQDQ": [[43, "matmulnbitstoqdq"]], "MergeAdapterWeights": [[24, "mergeadapterweights"], [43, "mergeadapterweights"]], "Metric Types": [[19, "metric-types"]], "Metrics file": [[27, "metrics-file"]], "Mixed Precision Conversion": [[30, "mixed-precision-conversion"]], "MixedPrecisionOverrides": [[43, "mixedprecisionoverrides"]], "Model Builder options": [[39, "model-builder-options"]], "Model Configuration": [[21, null], [41, "model-configuration"]], "Model Conversion": [[23, "model-conversion"], [26, "model-conversion"], [30, "model-conversion"]], "Model Conversion/Quantization": [[25, "model-conversion-quantization"]], "Model Library Generation": [[25, "model-library-generation"]], "Model Optimization": [[21, "model-optimization"]], "Model Script File Information": [[39, "model-script-file-information"]], "Model Splitting": [[28, null]], "Model config loading": [[18, "model-config-loading"]], "Model configuration file": [[27, "model-configuration-file"]], "ModelBuilder": [[43, "modelbuilder"]], "Models rank JSON file": [[27, "models-rank-json-file"]], "More details on arguments": [[5, "more-details-on-arguments"], [6, "more-details-on-arguments"]], "NVIDIA TensorRT Model Optimizer-Windows": [[33, "nvidia-tensorrt-model-optimizer-windows"]], "Named Arguments": [[39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"], [39, "named-arguments"]], "Native AzureML System": [[35, "native-azureml-system"]], "Native Docker System": [[35, "native-docker-system"]], "Native Python Environment System": [[35, "native-python-environment-system"]], "Notebook available!": [[5, "notebook-available"]], "ONNX": [[22, null], [30, null], [43, "onnx"]], "ONNX Model Handler": [[41, "onnx-model-handler"]], "ONNX Quantization": [[33, null]], "ONNX Surgeon Classes Documentation": [[29, null]], "ORT Performance Tuning": [[22, "ort-performance-tuning"]], "ORT Transformers Optimization": [[22, "ort-transformers-optimization"]], "Olive Options": [[42, null]], "Olive: The AI Model Optimization Toolkit for the ONNX Runtime": [[38, null]], "OliveModels": [[41, null]], "OnnxConversion": [[43, "onnxconversion"]], "OnnxDynamicQuantization": [[43, "onnxdynamicquantization"]], "OnnxFloatToFloat16": [[43, "onnxfloattofloat16"]], "OnnxIODataTypeConverter": [[43, "onnxiodatatypeconverter"]], "OnnxMatMul4Quantizer": [[43, "onnxmatmul4quantizer"]], "OnnxOpVersionConversion": [[43, "onnxopversionconversion"]], "OnnxPeepholeOptimizer": [[43, "onnxpeepholeoptimizer"]], "OnnxQuantization": [[43, "onnxquantization"]], "OnnxStaticQuantization": [[43, "onnxstaticquantization"]], "OpenVINO": [[23, null], [43, "openvino"]], "OpenVINO Model Handler": [[41, "openvino-model-handler"]], "OpenVINOConversion": [[43, "openvinoconversion"]], "OpenVINOQuantization": [[43, "openvinoquantization"]], "OptimumConversion": [[43, "optimumconversion"]], "OptimumMerging": [[43, "optimummerging"]], "Option 1: install Olive with OpenVINO extras": [[23, "option-1-install-olive-with-openvino-extras"]], "Option 2: Install OpenVINO Runtime and OpenVINO Development Tools from Pypi": [[23, "option-2-install-openvino-runtime-and-openvino-development-tools-from-pypi"]], "Optional Dependencies": [[37, "optional-dependencies"]], "OrtMixedPrecision": [[43, "ortmixedprecision"]], "OrtSessionParamsTuning": [[43, "ortsessionparamstuning"]], "OrtTransformersOptimization": [[43, "orttransformersoptimization"]], "Overview": [[16, "overview"], [45, null]], "Packaged files": [[27, "packaged-files"]], "Packaging Olive artifacts": [[27, null]], "Pass": [[2, "pass"]], "Pass Flows Information": [[42, "pass-flows-information"]], "Passes": [[43, null]], "Passes Information": [[42, "passes-information"]], "Peeophole Optimizer": [[22, "peeophole-optimizer"]], "Post Training Quantization (PTQ)": [[23, "post-training-quantization-ptq"], [26, "post-training-quantization-ptq"]], "Pre-processing for Finetuning": [[8, "pre-processing-for-finetuning"]], "Prerequisites": [[23, "prerequisites"], [25, "prerequisites"], [26, "prerequisites"], [35, "prerequisites"], [35, "id1"]], "Providing Input Models": [[39, "providing-input-models"]], "PyTorch": [[24, null], [31, null]], "PyTorch Exporter options": [[39, "pytorch-exporter-options"]], "PyTorch Model Handler": [[41, "pytorch-model-handler"]], "PyTorch Quantization": [[34, null]], "Python Environment System": [[35, "python-environment-system"]], "Pytorch": [[43, "pytorch"]], "QLoRA": [[24, "qlora"], [43, "qlora"]], "QNN": [[25, null], [43, "qnn"]], "QNNContextBinaryGenerator": [[43, "qnncontextbinarygenerator"]], "QNNConversion": [[43, "qnnconversion"]], "QNNModelLibGenerator": [[43, "qnnmodellibgenerator"]], "QNNPreprocess": [[43, "qnnpreprocess"]], "QuaRot": [[34, "quarot"], [43, "quarot"]], "Qualcomm SDK": [[39, "qualcomm-sdk"]], "Quantization": [[39, "quantization"]], "Quantization Aware Training": [[24, "quantization-aware-training"]], "Quantization with ONNX Optimizations": [[8, "quantization-with-onnx-optimizations"]], "QuantizationAwareTraining": [[43, "quantizationawaretraining"]], "Quantize": [[8, null]], "Quantize with AMD Vitis AI Quantizer": [[33, "quantize-with-amd-vitis-ai-quantizer"]], "Quantize with Intel\u00ae Neural Compressor": [[33, "quantize-with-intel-neural-compressor"]], "Quantize with onnxruntime": [[33, "quantize-with-onnxruntime"]], "RandomSearchAlgorithm": [[44, "randomsearchalgorithm"]], "Reference": [[40, null]], "Remote Model Path": [[20, "remote-model-path"]], "Remote Workflow": [[15, null]], "RemoveInitializerFromInputs": [[29, "removeinitializerfrominputs"]], "RemoveInputs": [[29, "removeinputs"]], "RemoveShapes": [[29, "removeshapes"]], "RenameInputs": [[29, "renameinputs"]], "RenameOutputs": [[29, "renameoutputs"]], "ReorderInputs": [[29, "reorderinputs"]], "ReplaceErfWithTanh": [[29, "replaceerfwithtanh"]], "Run": [[39, "run"]], "Run Olive Workflow": [[15, "run-olive-workflow"]], "Run Olive workflows": [[9, null]], "Running Olive Workflow Remotely on Azure Machine Learning workspace compute": [[15, "running-olive-workflow-remotely-on-azure-machine-learning-workspace-compute"]], "SNPE": [[26, null], [43, "snpe"]], "SNPE Model Handler": [[41, "snpe-model-handler"]], "SNPEConversion": [[43, "snpeconversion"]], "SNPEQuantization": [[43, "snpequantization"]], "SNPEtoONNXConversion": [[43, "snpetoonnxconversion"]], "Scripts list": [[13, "scripts-list"]], "Search Algorithm": [[2, "search-algorithm"]], "Search Strategy": [[2, "search-strategy"]], "SearchAlgorithms": [[44, null]], "Self-hosted Kubernetes cluster": [[12, null]], "Set-up": [[36, "set-up"]], "Setup and Usage": [[16, "setup-and-usage"]], "Shared Cache": [[16, null], [39, "shared-cache"]], "SliceGPT": [[24, "slicegpt"], [43, "slicegpt"]], "SparseGPT": [[24, "sparsegpt"], [43, "sparsegpt"]], "SpinQuant": [[34, "spinquant"], [43, "spinquant"]], "SplitModel": [[43, "splitmodel"]], "String Name": [[20, "string-name"]], "Supported Data Config Template": [[17, "supported-data-config-template"]], "Supported quantization techniques": [[8, "supported-quantization-techniques"]], "Surgeries": [[29, "surgeries"]], "System": [[2, "system"]], "Systems Information": [[42, "systems-information"]], "TPESearchAlgorithm": [[44, "tpesearchalgorithm"]], "Throughput Metric": [[19, "throughput-metric"]], "TorchTRTConversion": [[31, "torchtrtconversion"], [43, "torchtrtconversion"]], "Try Olive": [[45, "try-olive"]], "Tune OnnxRuntime Session Params": [[39, "tune-onnxruntime-session-params"]], "Usage": [[13, "usage"]], "Using Azure ML compute as host or target": [[11, "using-azure-ml-compute-as-host-or-target"]], "Using AzureML curated model": [[11, "using-azureml-curated-model"]], "Using AzureML registered model": [[11, "using-azureml-registered-model"]], "Using a model from an AzureML job output": [[11, "using-a-model-from-an-azureml-job-output"]], "Using data stored in AzureML datastore": [[11, "using-data-stored-in-azureml-datastore"]], "Using model stored in AzureML datastore": [[11, "using-model-stored-in-azureml-datastore"]], "VitisAIQuantization": [[43, "vitisaiquantization"]], "What is Olive Packaging": [[27, "what-is-olive-packaging"]], "What is Olive?": [[45, "what-is-olive"]], "Workflow Host": [[42, "workflow-host"]], "Workflow ID": [[42, "workflow-id"]], "Workflow outputs": [[15, "workflow-outputs"]], "Working with the CLI": [[36, "working-with-the-cli"]], "ZeroOutInput": [[29, "zerooutinput"]], "Zipfile": [[27, "zipfile"]], "aml_config.json:": [[42, "aml-config-json"]], "auto-opt": [[28, "auto-opt"]], "azureml_client with aml_config_path:": [[42, "azureml-client-with-aml-config-path"]], "azureml_client with azureml config fields:": [[42, "azureml-client-with-azureml-config-fields"]], "azureml_client:": [[42, "azureml-client"]], "generate-cost-model": [[28, "generate-cost-model"]], "manage_compute_instance": [[13, "manage-compute-instance"]], "script_dir": [[1, "script-dir"]], "user_script": [[1, "user-script"]]}, "docnames": ["examples", "extending/custom-scripts", "extending/design", "extending/how-to-add-optimization-pass", "extending/index", "getting-started/getting-started", "how-to/cli/cli-auto-opt", "how-to/cli/cli-finetune", "how-to/cli/cli-quantize", "how-to/cli/cli-run", "how-to/configure-workflows/auto-opt", "how-to/configure-workflows/azure-ai/azure-ai", "how-to/configure-workflows/azure-ai/azure-arc", "how-to/configure-workflows/azure-ai/azure-script", "how-to/configure-workflows/azure-ai/index", "how-to/configure-workflows/azure-ai/remote-workflow", "how-to/configure-workflows/azure-ai/shared-model-cache", "how-to/configure-workflows/how-to-configure-data", "how-to/configure-workflows/huggingface-integration", "how-to/configure-workflows/metrics-configuration", "how-to/configure-workflows/model-opt-and-transform/configure-model-path", "how-to/configure-workflows/model-opt-and-transform/index", "how-to/configure-workflows/model-opt-and-transform/onnx", "how-to/configure-workflows/model-opt-and-transform/openvino", "how-to/configure-workflows/model-opt-and-transform/pytorch", "how-to/configure-workflows/model-opt-and-transform/qnn", "how-to/configure-workflows/model-opt-and-transform/snpe", "how-to/configure-workflows/model-packaging", "how-to/configure-workflows/model-splitting", "how-to/configure-workflows/onnx-graph-surgeon", "how-to/configure-workflows/pass/convert-onnx", "how-to/configure-workflows/pass/convert-pytorch", "how-to/configure-workflows/pass/pass-configuration", "how-to/configure-workflows/pass/quantization-onnx", "how-to/configure-workflows/pass/quantization-pytorch", "how-to/configure-workflows/systems", "how-to/index", "how-to/installation", "index", "reference/cli", "reference/index", "reference/model", "reference/options", "reference/pass", "reference/search-algorithm", "why-olive"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["examples.md", "extending/custom-scripts.md", "extending/design.md", "extending/how-to-add-optimization-pass.md", "extending/index.rst", "getting-started/getting-started.md", "how-to/cli/cli-auto-opt.md", "how-to/cli/cli-finetune.md", "how-to/cli/cli-quantize.md", "how-to/cli/cli-run.md", "how-to/configure-workflows/auto-opt.md", "how-to/configure-workflows/azure-ai/azure-ai.md", "how-to/configure-workflows/azure-ai/azure-arc.md", "how-to/configure-workflows/azure-ai/azure-script.md", "how-to/configure-workflows/azure-ai/index.rst", "how-to/configure-workflows/azure-ai/remote-workflow.md", "how-to/configure-workflows/azure-ai/shared-model-cache.md", "how-to/configure-workflows/how-to-configure-data.md", "how-to/configure-workflows/huggingface-integration.md", "how-to/configure-workflows/metrics-configuration.md", "how-to/configure-workflows/model-opt-and-transform/configure-model-path.md", "how-to/configure-workflows/model-opt-and-transform/index.rst", "how-to/configure-workflows/model-opt-and-transform/onnx.md", "how-to/configure-workflows/model-opt-and-transform/openvino.md", "how-to/configure-workflows/model-opt-and-transform/pytorch.md", "how-to/configure-workflows/model-opt-and-transform/qnn.md", "how-to/configure-workflows/model-opt-and-transform/snpe.md", "how-to/configure-workflows/model-packaging.md", "how-to/configure-workflows/model-splitting.md", "how-to/configure-workflows/onnx-graph-surgeon.md", "how-to/configure-workflows/pass/convert-onnx.md", "how-to/configure-workflows/pass/convert-pytorch.md", "how-to/configure-workflows/pass/pass-configuration.md", "how-to/configure-workflows/pass/quantization-onnx.md", "how-to/configure-workflows/pass/quantization-pytorch.md", "how-to/configure-workflows/systems.md", "how-to/index.rst", "how-to/installation.md", "index.md", "reference/cli.rst", "reference/index.rst", "reference/model.rst", "reference/options.md", "reference/pass.rst", "reference/search-algorithm.rst", "why-olive.md"], "indexentries": {"a_bits": [[43, "cmdoption-arg-a_bits", false]], "a_per_token": [[43, "cmdoption-arg-a_per_token", false]], "a_symmetric": [[43, "cmdoption-arg-a_symmetric", false]], "accuracy_level": [[43, "cmdoption-arg-accuracy_level", false]], "activation_type": [[43, "cmdoption-arg-180", false], [43, "cmdoption-arg-85", false], [43, "cmdoption-arg-activation_type", false]], "activationsymmetric": [[43, "cmdoption-arg-181", false], [43, "cmdoption-arg-62", false], [43, "cmdoption-arg-89", false], [43, "cmdoption-arg-ActivationSymmetric", false]], "add_zero_point": [[43, "cmdoption-arg-add_zero_point", false]], "addqdqpairtoweight": [[43, "cmdoption-arg-AddQDQPairToWeight", false]], "algorithm": [[43, "cmdoption-arg-algorithm", false]], "all_tensors_to_one_file": [[43, "cmdoption-arg-102", false], [43, "cmdoption-arg-108", false], [43, "cmdoption-arg-113", false], [43, "cmdoption-arg-118", false], [43, "cmdoption-arg-12", false], [43, "cmdoption-arg-125", false], [43, "cmdoption-arg-145", false], [43, "cmdoption-arg-166", false], [43, "cmdoption-arg-185", false], [43, "cmdoption-arg-191", false], [43, "cmdoption-arg-197", false], [43, "cmdoption-arg-2", false], [43, "cmdoption-arg-202", false], [43, "cmdoption-arg-207", false], [43, "cmdoption-arg-21", false], [43, "cmdoption-arg-217", false], [43, "cmdoption-arg-26", false], [43, "cmdoption-arg-280", false], [43, "cmdoption-arg-32", false], [43, "cmdoption-arg-37", false], [43, "cmdoption-arg-42", false], [43, "cmdoption-arg-47", false], [43, "cmdoption-arg-69", false], [43, "cmdoption-arg-7", false], [43, "cmdoption-arg-96", false], [43, "cmdoption-arg-all_tensors_to_one_file", false]], "allow_tf32": [[43, "cmdoption-arg-230", false], [43, "cmdoption-arg-241", false], [43, "cmdoption-arg-allow_tf32", false]], "append_first_op_types_to_quantize_list": [[43, "cmdoption-arg-54", false], [43, "cmdoption-arg-76", false], [43, "cmdoption-arg-append_first_op_types_to_quantize_list", false]], "approach": [[43, "cmdoption-arg-129", false], [43, "cmdoption-arg-149", false], [43, "cmdoption-arg-approach", false]], "atol": [[43, "cmdoption-arg-atol", false]], "backend": [[43, "cmdoption-arg-131", false], [43, "cmdoption-arg-151", false], [43, "cmdoption-arg-285", false], [43, "cmdoption-arg-backend", false]], "binary_file": [[43, "cmdoption-arg-binary_file", false]], "bits": [[43, "cmdoption-arg-bits", false]], "block_size": [[43, "cmdoption-arg-block_size", false]], "block_to_split": [[43, "cmdoption-arg-block_to_split", false]], "blocksize": [[43, "cmdoption-arg-blocksize", false]], "calibrate_method": [[43, "cmdoption-arg-178", false], [43, "cmdoption-arg-83", false], [43, "cmdoption-arg-calibrate_method", false]], "calibration_batch_size": [[43, "cmdoption-arg-calibration_batch_size", false]], "calibration_data_config": [[43, "cmdoption-arg-calibration_data_config", false]], "calibration_nsamples": [[43, "cmdoption-arg-calibration_nsamples", false]], "calibration_sampling_size": [[43, "cmdoption-arg-164", false], [43, "cmdoption-arg-calibration_sampling_size", false]], "checkpoint_path": [[43, "cmdoption-arg-checkpoint_path", false]], "command line option": [[43, "cmdoption-arg-0", false], [43, "cmdoption-arg-1", false], [43, "cmdoption-arg-10", false], [43, "cmdoption-arg-100", false], [43, "cmdoption-arg-101", false], [43, "cmdoption-arg-102", false], [43, "cmdoption-arg-103", false], [43, "cmdoption-arg-104", false], [43, "cmdoption-arg-105", false], [43, "cmdoption-arg-106", false], [43, "cmdoption-arg-107", false], [43, "cmdoption-arg-108", false], [43, "cmdoption-arg-109", false], [43, "cmdoption-arg-11", false], [43, "cmdoption-arg-110", false], [43, "cmdoption-arg-111", false], [43, "cmdoption-arg-112", false], [43, "cmdoption-arg-113", false], [43, "cmdoption-arg-114", false], [43, "cmdoption-arg-115", false], [43, "cmdoption-arg-116", false], [43, "cmdoption-arg-117", false], [43, "cmdoption-arg-118", false], [43, "cmdoption-arg-119", false], [43, "cmdoption-arg-12", false], [43, "cmdoption-arg-120", false], [43, "cmdoption-arg-121", false], [43, "cmdoption-arg-122", false], [43, "cmdoption-arg-123", false], [43, "cmdoption-arg-124", false], [43, "cmdoption-arg-125", false], [43, "cmdoption-arg-126", false], [43, "cmdoption-arg-127", false], [43, "cmdoption-arg-128", false], [43, "cmdoption-arg-129", false], [43, "cmdoption-arg-13", false], [43, "cmdoption-arg-130", false], [43, "cmdoption-arg-131", false], [43, "cmdoption-arg-132", false], [43, "cmdoption-arg-133", false], [43, "cmdoption-arg-134", false], [43, "cmdoption-arg-135", false], [43, "cmdoption-arg-136", false], [43, "cmdoption-arg-137", false], [43, "cmdoption-arg-138", false], [43, "cmdoption-arg-139", false], [43, "cmdoption-arg-14", false], [43, "cmdoption-arg-140", false], [43, "cmdoption-arg-141", false], [43, "cmdoption-arg-142", false], [43, "cmdoption-arg-143", false], [43, "cmdoption-arg-144", false], [43, "cmdoption-arg-145", false], [43, "cmdoption-arg-146", false], [43, "cmdoption-arg-147", false], [43, "cmdoption-arg-148", false], [43, "cmdoption-arg-149", false], [43, "cmdoption-arg-15", false], [43, "cmdoption-arg-150", false], [43, "cmdoption-arg-151", false], [43, "cmdoption-arg-152", false], [43, "cmdoption-arg-153", false], [43, "cmdoption-arg-154", false], [43, "cmdoption-arg-155", false], [43, "cmdoption-arg-156", false], [43, "cmdoption-arg-157", false], [43, "cmdoption-arg-158", false], [43, "cmdoption-arg-159", false], [43, "cmdoption-arg-16", false], [43, "cmdoption-arg-160", false], [43, "cmdoption-arg-161", false], [43, "cmdoption-arg-162", false], [43, "cmdoption-arg-163", false], [43, "cmdoption-arg-164", false], [43, "cmdoption-arg-165", false], [43, "cmdoption-arg-166", false], [43, "cmdoption-arg-167", false], [43, "cmdoption-arg-168", false], [43, "cmdoption-arg-169", false], [43, "cmdoption-arg-17", false], [43, "cmdoption-arg-170", false], [43, "cmdoption-arg-171", false], [43, "cmdoption-arg-172", false], [43, "cmdoption-arg-173", false], [43, "cmdoption-arg-174", false], [43, "cmdoption-arg-175", false], [43, "cmdoption-arg-176", false], [43, "cmdoption-arg-177", false], [43, "cmdoption-arg-178", false], [43, "cmdoption-arg-179", false], [43, "cmdoption-arg-18", false], [43, "cmdoption-arg-180", false], [43, "cmdoption-arg-181", false], [43, "cmdoption-arg-182", false], [43, "cmdoption-arg-183", false], [43, "cmdoption-arg-184", false], [43, "cmdoption-arg-185", false], [43, "cmdoption-arg-186", false], [43, "cmdoption-arg-187", false], [43, "cmdoption-arg-188", false], [43, "cmdoption-arg-189", false], [43, "cmdoption-arg-19", false], [43, "cmdoption-arg-190", false], [43, "cmdoption-arg-191", false], [43, "cmdoption-arg-192", false], [43, "cmdoption-arg-193", false], [43, "cmdoption-arg-194", false], [43, "cmdoption-arg-195", false], [43, "cmdoption-arg-196", false], [43, "cmdoption-arg-197", false], [43, "cmdoption-arg-198", false], [43, "cmdoption-arg-199", false], [43, "cmdoption-arg-2", false], [43, "cmdoption-arg-20", false], [43, "cmdoption-arg-200", false], [43, "cmdoption-arg-201", false], [43, "cmdoption-arg-202", false], [43, "cmdoption-arg-203", false], [43, "cmdoption-arg-204", false], [43, "cmdoption-arg-205", false], [43, "cmdoption-arg-206", false], [43, "cmdoption-arg-207", false], [43, "cmdoption-arg-208", false], [43, "cmdoption-arg-209", false], [43, "cmdoption-arg-21", false], [43, "cmdoption-arg-210", false], [43, "cmdoption-arg-211", false], [43, "cmdoption-arg-212", false], [43, "cmdoption-arg-213", false], [43, "cmdoption-arg-214", false], [43, "cmdoption-arg-215", false], [43, "cmdoption-arg-216", false], [43, "cmdoption-arg-217", false], [43, "cmdoption-arg-218", false], [43, "cmdoption-arg-219", false], [43, "cmdoption-arg-22", false], [43, "cmdoption-arg-220", false], [43, "cmdoption-arg-221", false], [43, "cmdoption-arg-222", false], [43, "cmdoption-arg-223", false], [43, "cmdoption-arg-224", false], [43, "cmdoption-arg-225", false], [43, "cmdoption-arg-226", false], [43, "cmdoption-arg-227", false], [43, "cmdoption-arg-228", false], [43, "cmdoption-arg-229", false], [43, "cmdoption-arg-23", false], [43, "cmdoption-arg-230", false], [43, "cmdoption-arg-231", false], [43, "cmdoption-arg-232", false], [43, "cmdoption-arg-233", false], [43, "cmdoption-arg-234", false], [43, "cmdoption-arg-235", false], [43, "cmdoption-arg-236", false], [43, "cmdoption-arg-237", false], [43, "cmdoption-arg-238", false], [43, "cmdoption-arg-239", false], [43, "cmdoption-arg-24", false], [43, "cmdoption-arg-240", false], [43, "cmdoption-arg-241", false], [43, "cmdoption-arg-242", false], [43, "cmdoption-arg-243", false], [43, "cmdoption-arg-244", false], [43, "cmdoption-arg-245", false], [43, "cmdoption-arg-246", false], [43, "cmdoption-arg-247", false], [43, "cmdoption-arg-248", false], [43, "cmdoption-arg-249", false], [43, "cmdoption-arg-25", false], [43, "cmdoption-arg-250", false], [43, "cmdoption-arg-251", false], [43, "cmdoption-arg-252", false], [43, "cmdoption-arg-253", false], [43, "cmdoption-arg-254", false], [43, "cmdoption-arg-255", false], [43, "cmdoption-arg-256", false], [43, "cmdoption-arg-257", false], [43, "cmdoption-arg-258", false], [43, "cmdoption-arg-259", false], [43, "cmdoption-arg-26", false], [43, "cmdoption-arg-260", false], [43, "cmdoption-arg-261", false], [43, "cmdoption-arg-262", false], [43, "cmdoption-arg-263", false], [43, "cmdoption-arg-264", false], [43, "cmdoption-arg-265", false], [43, "cmdoption-arg-266", false], [43, "cmdoption-arg-267", false], [43, "cmdoption-arg-268", false], [43, "cmdoption-arg-269", false], [43, "cmdoption-arg-27", false], [43, "cmdoption-arg-270", false], [43, "cmdoption-arg-271", false], [43, "cmdoption-arg-272", false], [43, "cmdoption-arg-273", false], [43, "cmdoption-arg-274", false], [43, "cmdoption-arg-275", false], [43, "cmdoption-arg-276", false], [43, "cmdoption-arg-277", false], [43, "cmdoption-arg-278", false], [43, "cmdoption-arg-279", false], [43, "cmdoption-arg-28", false], [43, "cmdoption-arg-280", false], [43, "cmdoption-arg-281", false], [43, "cmdoption-arg-282", false], [43, "cmdoption-arg-283", false], [43, "cmdoption-arg-284", false], [43, "cmdoption-arg-285", false], [43, "cmdoption-arg-286", false], [43, "cmdoption-arg-29", false], [43, "cmdoption-arg-3", false], [43, "cmdoption-arg-30", false], [43, "cmdoption-arg-31", false], [43, "cmdoption-arg-32", false], [43, "cmdoption-arg-33", false], [43, "cmdoption-arg-34", false], [43, "cmdoption-arg-35", false], [43, "cmdoption-arg-36", false], [43, "cmdoption-arg-37", false], [43, "cmdoption-arg-38", false], [43, "cmdoption-arg-39", false], [43, "cmdoption-arg-4", false], [43, "cmdoption-arg-40", false], [43, "cmdoption-arg-41", false], [43, "cmdoption-arg-42", false], [43, "cmdoption-arg-43", false], [43, "cmdoption-arg-44", false], [43, "cmdoption-arg-45", false], [43, "cmdoption-arg-46", false], [43, "cmdoption-arg-47", false], [43, "cmdoption-arg-48", false], [43, "cmdoption-arg-49", false], [43, "cmdoption-arg-5", false], [43, "cmdoption-arg-50", false], [43, "cmdoption-arg-51", false], [43, "cmdoption-arg-52", false], [43, "cmdoption-arg-53", false], [43, "cmdoption-arg-54", false], [43, "cmdoption-arg-55", false], [43, "cmdoption-arg-56", false], [43, "cmdoption-arg-57", false], [43, "cmdoption-arg-58", false], [43, "cmdoption-arg-59", false], [43, "cmdoption-arg-6", false], [43, "cmdoption-arg-60", false], [43, "cmdoption-arg-61", false], [43, "cmdoption-arg-62", false], [43, "cmdoption-arg-63", false], [43, "cmdoption-arg-64", false], [43, "cmdoption-arg-65", false], [43, "cmdoption-arg-66", false], [43, "cmdoption-arg-67", false], [43, "cmdoption-arg-68", false], [43, "cmdoption-arg-69", false], [43, "cmdoption-arg-7", false], [43, "cmdoption-arg-70", false], [43, "cmdoption-arg-71", false], [43, "cmdoption-arg-72", false], [43, "cmdoption-arg-73", false], [43, "cmdoption-arg-74", false], [43, "cmdoption-arg-75", false], [43, "cmdoption-arg-76", false], [43, "cmdoption-arg-77", false], [43, "cmdoption-arg-78", false], [43, "cmdoption-arg-79", false], [43, "cmdoption-arg-8", false], [43, "cmdoption-arg-80", false], [43, "cmdoption-arg-81", false], [43, "cmdoption-arg-82", false], [43, "cmdoption-arg-83", false], [43, "cmdoption-arg-84", false], [43, "cmdoption-arg-85", false], [43, "cmdoption-arg-86", false], [43, "cmdoption-arg-87", false], [43, "cmdoption-arg-88", false], [43, "cmdoption-arg-89", false], [43, "cmdoption-arg-9", false], [43, "cmdoption-arg-90", false], [43, "cmdoption-arg-91", false], [43, "cmdoption-arg-92", false], [43, "cmdoption-arg-93", false], [43, "cmdoption-arg-94", false], [43, "cmdoption-arg-95", false], [43, "cmdoption-arg-96", false], [43, "cmdoption-arg-97", false], [43, "cmdoption-arg-98", false], [43, "cmdoption-arg-99", false], [43, "cmdoption-arg-ActivationSymmetric", false], [43, "cmdoption-arg-AddQDQPairToWeight", false], [43, "cmdoption-arg-EnableSubgraph", false], [43, "cmdoption-arg-ForceQuantizeNoInputCheck", false], [43, "cmdoption-arg-MatMulConstBOnly", false], [43, "cmdoption-arg-WeightSymmetric", false], [43, "cmdoption-arg-a_bits", false], [43, "cmdoption-arg-a_per_token", false], [43, "cmdoption-arg-a_symmetric", false], [43, "cmdoption-arg-accuracy_level", false], [43, "cmdoption-arg-activation_type", false], [43, "cmdoption-arg-add_zero_point", false], [43, "cmdoption-arg-algorithm", false], [43, "cmdoption-arg-all_tensors_to_one_file", false], [43, "cmdoption-arg-allow_tf32", false], [43, "cmdoption-arg-append_first_op_types_to_quantize_list", false], [43, "cmdoption-arg-approach", false], [43, "cmdoption-arg-atol", false], [43, "cmdoption-arg-backend", false], [43, "cmdoption-arg-binary_file", false], [43, "cmdoption-arg-bits", false], [43, "cmdoption-arg-block_size", false], [43, "cmdoption-arg-block_to_split", false], [43, "cmdoption-arg-blocksize", false], [43, "cmdoption-arg-calibrate_method", false], [43, "cmdoption-arg-calibration_batch_size", false], [43, "cmdoption-arg-calibration_data_config", false], [43, "cmdoption-arg-calibration_nsamples", false], [43, "cmdoption-arg-calibration_sampling_size", false], [43, "cmdoption-arg-checkpoint_path", false], [43, "cmdoption-arg-components", false], [43, "cmdoption-arg-compress_to_fp16", false], [43, "cmdoption-arg-compute_dtype", false], [43, "cmdoption-arg-convert_attribute", false], [43, "cmdoption-arg-cost_model", false], [43, "cmdoption-arg-cpu_cores", false], [43, "cmdoption-arg-damp_percent", false], [43, "cmdoption-arg-data_config", false], [43, "cmdoption-arg-desc_act", false], [43, "cmdoption-arg-device", false], [43, "cmdoption-arg-dim_param", false], [43, "cmdoption-arg-dim_value", false], [43, "cmdoption-arg-do_validate", false], [43, "cmdoption-arg-domain", false], [43, "cmdoption-arg-double_quant", false], [43, "cmdoption-arg-duo_scaling", false], [43, "cmdoption-arg-dynamic", false], [43, "cmdoption-arg-dynamic_lora_r", false], [43, "cmdoption-arg-element_wise_binary_ops", false], [43, "cmdoption-arg-enable_cuda_graph", false], [43, "cmdoption-arg-enable_dpu", false], [43, "cmdoption-arg-enable_htp", false], [43, "cmdoption-arg-enable_profiling", false], [43, "cmdoption-arg-eval_data_config", false], [43, "cmdoption-arg-example_input_func", false], [43, "cmdoption-arg-exclude_embeds", false], [43, "cmdoption-arg-exclude_lm_head", false], [43, "cmdoption-arg-excluded_precisions", false], [43, "cmdoption-arg-execution_mode_list", false], [43, "cmdoption-arg-export_compatible", false], [43, "cmdoption-arg-external_data_name", false], [43, "cmdoption-arg-extra.Sigmoid.nnapi", false], [43, "cmdoption-arg-extra_args", false], [43, "cmdoption-arg-extra_configs", false], [43, "cmdoption-arg-extra_options", false], [43, "cmdoption-arg-extra_session_config", false], [43, "cmdoption-arg-final_orientation", false], [43, "cmdoption-arg-float16", false], [43, "cmdoption-arg-force_evaluate_other_eps", false], [43, "cmdoption-arg-force_fp16_inputs", false], [43, "cmdoption-arg-force_fp32_nodes", false], [43, "cmdoption-arg-force_fp32_ops", false], [43, "cmdoption-arg-fp16", false], [43, "cmdoption-arg-fuse_layernorm", false], [43, "cmdoption-arg-gpus", false], [43, "cmdoption-arg-group_size", false], [43, "cmdoption-arg-hidden_size", false], [43, "cmdoption-arg-htp_socs", false], [43, "cmdoption-arg-ignored_scope", false], [43, "cmdoption-arg-ignored_scope_type", false], [43, "cmdoption-arg-input", false], [43, "cmdoption-arg-input_dim", false], [43, "cmdoption-arg-input_int32", false], [43, "cmdoption-arg-input_layouts", false], [43, "cmdoption-arg-input_model_dtype", false], [43, "cmdoption-arg-input_name", false], [43, "cmdoption-arg-input_names", false], [43, "cmdoption-arg-input_nodes", false], [43, "cmdoption-arg-input_shape", false], [43, "cmdoption-arg-input_shapes", false], [43, "cmdoption-arg-input_types", false], [43, "cmdoption-arg-inputs_to_make_channel_last", false], [43, "cmdoption-arg-inside_layer_modules", false], [43, "cmdoption-arg-int4_accuracy_level", false], [43, "cmdoption-arg-int4_block_size", false], [43, "cmdoption-arg-inter_thread_num_list", false], [43, "cmdoption-arg-intra_thread_num_list", false], [43, "cmdoption-arg-io_bind", false], [43, "cmdoption-arg-is_symmetric", false], [43, "cmdoption-arg-keep_io_types", false], [43, "cmdoption-arg-layer_name_filter", false], [43, "cmdoption-arg-layers_block_name", false], [43, "cmdoption-arg-lib_name", false], [43, "cmdoption-arg-lib_targets", false], [43, "cmdoption-arg-loftq_iter", false], [43, "cmdoption-arg-logger", false], [43, "cmdoption-arg-lora_alpha", false], [43, "cmdoption-arg-lora_dropout", false], [43, "cmdoption-arg-lora_r", false], [43, "cmdoption-arg-make_inputs", false], [43, "cmdoption-arg-max_finite_val", false], [43, "cmdoption-arg-max_layer", false], [43, "cmdoption-arg-merge_adapter_weights", false], [43, "cmdoption-arg-metadata_only", false], [43, "cmdoption-arg-metric", false], [43, "cmdoption-arg-min_layer", false], [43, "cmdoption-arg-min_positive_val", false], [43, "cmdoption-arg-model_type", false], [43, "cmdoption-arg-modules_to_fuse", false], [43, "cmdoption-arg-modules_to_not_convert", false], [43, "cmdoption-arg-modules_to_save", false], [43, "cmdoption-arg-name_pattern", false], [43, "cmdoption-arg-need_layer_fusing", false], [43, "cmdoption-arg-no_repeat_ngram_size", false], [43, "cmdoption-arg-node_block_list", false], [43, "cmdoption-arg-nodes_to_exclude", false], [43, "cmdoption-arg-nodes_to_quantize", false], [43, "cmdoption-arg-num_epochs", false], [43, "cmdoption-arg-num_heads", false], [43, "cmdoption-arg-num_key_value_heads", false], [43, "cmdoption-arg-num_splits", false], [43, "cmdoption-arg-num_steps", false], [43, "cmdoption-arg-only_onnxruntime", false], [43, "cmdoption-arg-op_block_list", false], [43, "cmdoption-arg-op_type_dict", false], [43, "cmdoption-arg-op_types_to_quantize", false], [43, "cmdoption-arg-opt_level", false], [43, "cmdoption-arg-opt_level_list", false], [43, "cmdoption-arg-optimization_options", false], [43, "cmdoption-arg-optimize_model", false], [43, "cmdoption-arg-optional_inputs", false], [43, "cmdoption-arg-out_node", false], [43, "cmdoption-arg-output_model", false], [43, "cmdoption-arg-output_names", false], [43, "cmdoption-arg-output_nodes", false], [43, "cmdoption-arg-outputs_to_make_channel_last", false], [43, "cmdoption-arg-outside_layer_modules", false], [43, "cmdoption-arg-overrides_config", false], [43, "cmdoption-arg-parallel_jobs", false], [43, "cmdoption-arg-past_key_value_name", false], [43, "cmdoption-arg-per_channel", false], [43, "cmdoption-arg-percdamp", false], [43, "cmdoption-arg-post", false], [43, "cmdoption-arg-pre", false], [43, "cmdoption-arg-precision", false], [43, "cmdoption-arg-prepare_qnn_config", false], [43, "cmdoption-arg-preset", false], [43, "cmdoption-arg-provider_options_list", false], [43, "cmdoption-arg-providers_list", false], [43, "cmdoption-arg-ptl_data_module", false], [43, "cmdoption-arg-ptl_module", false], [43, "cmdoption-arg-q_group_size", false], [43, "cmdoption-arg-qconfig_func", false], [43, "cmdoption-arg-qnn_extra_options", false], [43, "cmdoption-arg-quant_format", false], [43, "cmdoption-arg-quant_level", false], [43, "cmdoption-arg-quant_mode", false], [43, "cmdoption-arg-quant_preprocess", false], [43, "cmdoption-arg-quant_type", false], [43, "cmdoption-arg-recipes", false], [43, "cmdoption-arg-reduce_range", false], [43, "cmdoption-arg-rotate_mode", false], [43, "cmdoption-arg-round_interval", false], [43, "cmdoption-arg-save_as_external_data", false], [43, "cmdoption-arg-save_format", false], [43, "cmdoption-arg-save_metadata_for_token_generation", false], [43, "cmdoption-arg-save_quant_config", false], [43, "cmdoption-arg-script_dir", false], [43, "cmdoption-arg-search", false], [43, "cmdoption-arg-seed", false], [43, "cmdoption-arg-size_threshold", false], [43, "cmdoption-arg-source_dtype", false], [43, "cmdoption-arg-sparsity", false], [43, "cmdoption-arg-static_groups", false], [43, "cmdoption-arg-strict", false], [43, "cmdoption-arg-surgeries", false], [43, "cmdoption-arg-sym", false], [43, "cmdoption-arg-target_device", false], [43, "cmdoption-arg-target_dtype", false], [43, "cmdoption-arg-target_modules", false], [43, "cmdoption-arg-target_opset", false], [43, "cmdoption-arg-tool_command", false], [43, "cmdoption-arg-tool_command_args", false], [43, "cmdoption-arg-torch_dtype", false], [43, "cmdoption-arg-train_data_config", false], [43, "cmdoption-arg-training_args", false], [43, "cmdoption-arg-training_loop_func", false], [43, "cmdoption-arg-trt_fp16_enable", false], [43, "cmdoption-arg-true_sequential", false], [43, "cmdoption-arg-tuning_criterion", false], [43, "cmdoption-arg-use_dynamo_exporter", false], [43, "cmdoption-arg-use_enhanced_quantizer", false], [43, "cmdoption-arg-use_external_data_format", false], [43, "cmdoption-arg-use_forced_decoder_ids", false], [43, "cmdoption-arg-use_gpu", false], [43, "cmdoption-arg-use_gqa", false], [43, "cmdoption-arg-use_int4", false], [43, "cmdoption-arg-use_logits_processor", false], [43, "cmdoption-arg-use_prefix_vocab_mask", false], [43, "cmdoption-arg-use_symbolic_shape_infer", false], [43, "cmdoption-arg-use_temperature", false], [43, "cmdoption-arg-use_transpose_op", false], [43, "cmdoption-arg-use_vocab_mask", false], [43, "cmdoption-arg-user_script", false], [43, "cmdoption-arg-val_data_config", false], [43, "cmdoption-arg-version", false], [43, "cmdoption-arg-w_bit", false], [43, "cmdoption-arg-weight_only_config", false], [43, "cmdoption-arg-weight_only_quant_configs", false], [43, "cmdoption-arg-weight_type", false], [43, "cmdoption-arg-workspace", false], [43, "cmdoption-arg-zero_point", false], [44, "cmdoption-arg-0", false], [44, "cmdoption-arg-1", false], [44, "cmdoption-arg-group", false], [44, "cmdoption-arg-multivariate", false], [44, "cmdoption-arg-num_samples", false], [44, "cmdoption-arg-seed", false], [44, "cmdoption-arg-with_replacement", false]], "components": [[43, "cmdoption-arg-components", false]], "compositemodelhandler (class in olive.model)": [[41, "olive.model.CompositeModelHandler", false]], "compress_to_fp16": [[43, "cmdoption-arg-compress_to_fp16", false]], "compute_dtype": [[43, "cmdoption-arg-234", false], [43, "cmdoption-arg-compute_dtype", false]], "convert_attribute": [[43, "cmdoption-arg-10", false], [43, "cmdoption-arg-105", false], [43, "cmdoption-arg-111", false], [43, "cmdoption-arg-116", false], [43, "cmdoption-arg-121", false], [43, "cmdoption-arg-128", false], [43, "cmdoption-arg-148", false], [43, "cmdoption-arg-15", false], [43, "cmdoption-arg-169", false], [43, "cmdoption-arg-188", false], [43, "cmdoption-arg-194", false], [43, "cmdoption-arg-200", false], [43, "cmdoption-arg-205", false], [43, "cmdoption-arg-210", false], [43, "cmdoption-arg-220", false], [43, "cmdoption-arg-24", false], [43, "cmdoption-arg-283", false], [43, "cmdoption-arg-29", false], [43, "cmdoption-arg-35", false], [43, "cmdoption-arg-40", false], [43, "cmdoption-arg-45", false], [43, "cmdoption-arg-5", false], [43, "cmdoption-arg-50", false], [43, "cmdoption-arg-72", false], [43, "cmdoption-arg-99", false], [43, "cmdoption-arg-convert_attribute", false]], "cost_model": [[43, "cmdoption-arg-cost_model", false]], "cpu_cores": [[43, "cmdoption-arg-cpu_cores", false]], "damp_percent": [[43, "cmdoption-arg-damp_percent", false]], "data_config": [[43, "cmdoption-arg-106", false], [43, "cmdoption-arg-142", false], [43, "cmdoption-arg-162", false], [43, "cmdoption-arg-171", false], [43, "cmdoption-arg-249", false], [43, "cmdoption-arg-258", false], [43, "cmdoption-arg-261", false], [43, "cmdoption-arg-266", false], [43, "cmdoption-arg-271", false], [43, "cmdoption-arg-275", false], [43, "cmdoption-arg-60", false], [43, "cmdoption-arg-82", false], [43, "cmdoption-arg-data_config", false]], "desc_act": [[43, "cmdoption-arg-desc_act", false]], "device": [[43, "cmdoption-arg-122", false], [43, "cmdoption-arg-130", false], [43, "cmdoption-arg-150", false], [43, "cmdoption-arg-18", false], [43, "cmdoption-arg-215", false], [43, "cmdoption-arg-248", false], [43, "cmdoption-arg-device", false]], "dim_param": [[43, "cmdoption-arg-dim_param", false]], "dim_value": [[43, "cmdoption-arg-dim_value", false]], "distributedhfmodelhandler (class in olive.model)": [[41, "olive.model.DistributedHfModelHandler", false]], "distributedonnxmodelhandler (class in olive.model)": [[41, "olive.model.DistributedOnnxModelHandler", false]], "do_validate": [[43, "cmdoption-arg-do_validate", false]], "domain": [[43, "cmdoption-arg-132", false], [43, "cmdoption-arg-152", false], [43, "cmdoption-arg-domain", false]], "double_quant": [[43, "cmdoption-arg-double_quant", false]], "duo_scaling": [[43, "cmdoption-arg-duo_scaling", false]], "dynamic": [[43, "cmdoption-arg-dynamic", false]], "dynamic_lora_r": [[43, "cmdoption-arg-dynamic_lora_r", false]], "element_wise_binary_ops": [[43, "cmdoption-arg-element_wise_binary_ops", false]], "enable_cuda_graph": [[43, "cmdoption-arg-221", false], [43, "cmdoption-arg-enable_cuda_graph", false]], "enable_dpu": [[43, "cmdoption-arg-enable_dpu", false]], "enable_htp": [[43, "cmdoption-arg-enable_htp", false]], "enable_profiling": [[43, "cmdoption-arg-enable_profiling", false]], "enablesubgraph": [[43, "cmdoption-arg-64", false], [43, "cmdoption-arg-91", false], [43, "cmdoption-arg-EnableSubgraph", false]], "eval_data_config": [[43, "cmdoption-arg-232", false], [43, "cmdoption-arg-243", false], [43, "cmdoption-arg-eval_data_config", false]], "evaluation_strategy (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.evaluation_strategy", false]], "example_input_func": [[43, "cmdoption-arg-example_input_func", false]], "exclude_embeds": [[43, "cmdoption-arg-222", false], [43, "cmdoption-arg-exclude_embeds", false]], "exclude_lm_head": [[43, "cmdoption-arg-223", false], [43, "cmdoption-arg-exclude_lm_head", false]], "excluded_precisions": [[43, "cmdoption-arg-137", false], [43, "cmdoption-arg-157", false], [43, "cmdoption-arg-excluded_precisions", false]], "execution_mode_list": [[43, "cmdoption-arg-execution_mode_list", false]], "export_compatible": [[43, "cmdoption-arg-export_compatible", false]], "external_data_name": [[43, "cmdoption-arg-103", false], [43, "cmdoption-arg-109", false], [43, "cmdoption-arg-114", false], [43, "cmdoption-arg-119", false], [43, "cmdoption-arg-126", false], [43, "cmdoption-arg-13", false], [43, "cmdoption-arg-146", false], [43, "cmdoption-arg-167", false], [43, "cmdoption-arg-186", false], [43, "cmdoption-arg-192", false], [43, "cmdoption-arg-198", false], [43, "cmdoption-arg-203", false], [43, "cmdoption-arg-208", false], [43, "cmdoption-arg-218", false], [43, "cmdoption-arg-22", false], [43, "cmdoption-arg-27", false], [43, "cmdoption-arg-281", false], [43, "cmdoption-arg-3", false], [43, "cmdoption-arg-33", false], [43, "cmdoption-arg-38", false], [43, "cmdoption-arg-43", false], [43, "cmdoption-arg-48", false], [43, "cmdoption-arg-70", false], [43, "cmdoption-arg-8", false], [43, "cmdoption-arg-97", false], [43, "cmdoption-arg-external_data_name", false]], "extra.sigmoid.nnapi": [[43, "cmdoption-arg-61", false], [43, "cmdoption-arg-88", false], [43, "cmdoption-arg-extra.Sigmoid.nnapi", false]], "extra_args": [[43, "cmdoption-arg-274", false], [43, "cmdoption-arg-276", false], [43, "cmdoption-arg-284", false], [43, "cmdoption-arg-286", false], [43, "cmdoption-arg-extra_args", false]], "extra_configs": [[43, "cmdoption-arg-273", false], [43, "cmdoption-arg-extra_configs", false]], "extra_options": [[43, "cmdoption-arg-183", false], [43, "cmdoption-arg-67", false], [43, "cmdoption-arg-94", false], [43, "cmdoption-arg-extra_options", false]], "extra_session_config": [[43, "cmdoption-arg-extra_session_config", false]], "final_orientation": [[43, "cmdoption-arg-final_orientation", false]], "float16": [[43, "cmdoption-arg-265", false], [43, "cmdoption-arg-float16", false]], "force_evaluate_other_eps": [[43, "cmdoption-arg-force_evaluate_other_eps", false]], "force_fp16_inputs": [[43, "cmdoption-arg-force_fp16_inputs", false]], "force_fp32_nodes": [[43, "cmdoption-arg-force_fp32_nodes", false]], "force_fp32_ops": [[43, "cmdoption-arg-force_fp32_ops", false]], "forcequantizenoinputcheck": [[43, "cmdoption-arg-65", false], [43, "cmdoption-arg-92", false], [43, "cmdoption-arg-ForceQuantizeNoInputCheck", false]], "fp16": [[43, "cmdoption-arg-214", false], [43, "cmdoption-arg-fp16", false]], "fuse_layernorm": [[43, "cmdoption-arg-fuse_layernorm", false]], "gpus": [[43, "cmdoption-arg-gpus", false]], "group": [[44, "cmdoption-arg-group", false]], "group_size": [[43, "cmdoption-arg-group_size", false]], "hfmodelhandler (class in olive.model)": [[41, "olive.model.HfModelHandler", false]], "hidden_size": [[43, "cmdoption-arg-hidden_size", false]], "htp_socs": [[43, "cmdoption-arg-htp_socs", false]], "ignored_scope": [[43, "cmdoption-arg-ignored_scope", false]], "ignored_scope_type": [[43, "cmdoption-arg-ignored_scope_type", false]], "input": [[43, "cmdoption-arg-input", false]], "input_dim": [[43, "cmdoption-arg-input_dim", false]], "input_int32": [[43, "cmdoption-arg-input_int32", false]], "input_layouts": [[43, "cmdoption-arg-input_layouts", false]], "input_model_dtype": [[43, "cmdoption-arg-input_model_dtype", false]], "input_name": [[43, "cmdoption-arg-input_name", false]], "input_names": [[43, "cmdoption-arg-input_names", false]], "input_nodes": [[43, "cmdoption-arg-input_nodes", false]], "input_shape": [[43, "cmdoption-arg-input_shape", false]], "input_shapes": [[43, "cmdoption-arg-input_shapes", false]], "input_types": [[43, "cmdoption-arg-input_types", false]], "inputs_to_make_channel_last": [[43, "cmdoption-arg-inputs_to_make_channel_last", false]], "inside_layer_modules": [[43, "cmdoption-arg-inside_layer_modules", false]], "int4_accuracy_level": [[43, "cmdoption-arg-int4_accuracy_level", false]], "int4_block_size": [[43, "cmdoption-arg-int4_block_size", false]], "inter_thread_num_list": [[43, "cmdoption-arg-inter_thread_num_list", false]], "intra_thread_num_list": [[43, "cmdoption-arg-intra_thread_num_list", false]], "io_bind": [[43, "cmdoption-arg-io_bind", false]], "is_symmetric": [[43, "cmdoption-arg-is_symmetric", false]], "keep_io_types": [[43, "cmdoption-arg-19", false], [43, "cmdoption-arg-keep_io_types", false]], "layer_name_filter": [[43, "cmdoption-arg-264", false], [43, "cmdoption-arg-layer_name_filter", false]], "layers_block_name": [[43, "cmdoption-arg-layers_block_name", false]], "learning_rate (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.learning_rate", false]], "lib_name": [[43, "cmdoption-arg-lib_name", false]], "lib_targets": [[43, "cmdoption-arg-lib_targets", false]], "loftq_iter": [[43, "cmdoption-arg-loftq_iter", false]], "logger": [[43, "cmdoption-arg-logger", false]], "lora_alpha": [[43, "cmdoption-arg-226", false], [43, "cmdoption-arg-237", false], [43, "cmdoption-arg-lora_alpha", false]], "lora_dropout": [[43, "cmdoption-arg-227", false], [43, "cmdoption-arg-238", false], [43, "cmdoption-arg-lora_dropout", false]], "lora_r": [[43, "cmdoption-arg-225", false], [43, "cmdoption-arg-236", false], [43, "cmdoption-arg-lora_r", false]], "lr_scheduler_type (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.lr_scheduler_type", false]], "make_inputs": [[43, "cmdoption-arg-make_inputs", false]], "matmulconstbonly": [[43, "cmdoption-arg-66", false], [43, "cmdoption-arg-93", false], [43, "cmdoption-arg-MatMulConstBOnly", false]], "max_finite_val": [[43, "cmdoption-arg-max_finite_val", false]], "max_layer": [[43, "cmdoption-arg-263", false], [43, "cmdoption-arg-max_layer", false]], "merge_adapter_weights": [[43, "cmdoption-arg-merge_adapter_weights", false]], "metadata_only": [[43, "cmdoption-arg-metadata_only", false]], "metric": [[43, "cmdoption-arg-139", false], [43, "cmdoption-arg-159", false], [43, "cmdoption-arg-metric", false]], "min_layer": [[43, "cmdoption-arg-262", false], [43, "cmdoption-arg-min_layer", false]], "min_positive_val": [[43, "cmdoption-arg-min_positive_val", false]], "model_type": [[43, "cmdoption-arg-272", false], [43, "cmdoption-arg-model_type", false]], "modelconfig (class in olive.model)": [[41, "olive.model.ModelConfig", false]], "modules_to_fuse": [[43, "cmdoption-arg-modules_to_fuse", false]], "modules_to_not_convert": [[43, "cmdoption-arg-modules_to_not_convert", false]], "modules_to_save": [[43, "cmdoption-arg-228", false], [43, "cmdoption-arg-239", false], [43, "cmdoption-arg-modules_to_save", false]], "multivariate": [[44, "cmdoption-arg-multivariate", false]], "name_pattern": [[43, "cmdoption-arg-name_pattern", false]], "need_layer_fusing": [[43, "cmdoption-arg-need_layer_fusing", false]], "no_repeat_ngram_size": [[43, "cmdoption-arg-no_repeat_ngram_size", false]], "node_block_list": [[43, "cmdoption-arg-node_block_list", false]], "nodes_to_exclude": [[43, "cmdoption-arg-100", false], [43, "cmdoption-arg-175", false], [43, "cmdoption-arg-56", false], [43, "cmdoption-arg-78", false], [43, "cmdoption-arg-nodes_to_exclude", false]], "nodes_to_quantize": [[43, "cmdoption-arg-174", false], [43, "cmdoption-arg-55", false], [43, "cmdoption-arg-77", false], [43, "cmdoption-arg-nodes_to_quantize", false]], "num_epochs": [[43, "cmdoption-arg-num_epochs", false]], "num_heads": [[43, "cmdoption-arg-num_heads", false]], "num_key_value_heads": [[43, "cmdoption-arg-num_key_value_heads", false]], "num_samples": [[44, "cmdoption-arg-0", false], [44, "cmdoption-arg-num_samples", false]], "num_splits": [[43, "cmdoption-arg-num_splits", false]], "num_steps": [[43, "cmdoption-arg-num_steps", false]], "only_onnxruntime": [[43, "cmdoption-arg-only_onnxruntime", false]], "onnxmodelhandler (class in olive.model)": [[41, "olive.model.ONNXModelHandler", false]], "op_block_list": [[43, "cmdoption-arg-30", false], [43, "cmdoption-arg-op_block_list", false]], "op_type_dict": [[43, "cmdoption-arg-141", false], [43, "cmdoption-arg-161", false], [43, "cmdoption-arg-op_type_dict", false]], "op_types_to_quantize": [[43, "cmdoption-arg-173", false], [43, "cmdoption-arg-53", false], [43, "cmdoption-arg-75", false], [43, "cmdoption-arg-op_types_to_quantize", false]], "openvinomodelhandler (class in olive.model)": [[41, "olive.model.OpenVINOModelHandler", false]], "opt_level": [[43, "cmdoption-arg-opt_level", false]], "opt_level_list": [[43, "cmdoption-arg-opt_level_list", false]], "optim (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.optim", false]], "optimization_options": [[43, "cmdoption-arg-optimization_options", false]], "optimize_model": [[43, "cmdoption-arg-optimize_model", false]], "optional_inputs": [[43, "cmdoption-arg-optional_inputs", false]], "out_node": [[43, "cmdoption-arg-out_node", false]], "output_model": [[43, "cmdoption-arg-output_model", false]], "output_names": [[43, "cmdoption-arg-output_names", false]], "output_nodes": [[43, "cmdoption-arg-output_nodes", false]], "outputs_to_make_channel_last": [[43, "cmdoption-arg-outputs_to_make_channel_last", false]], "outside_layer_modules": [[43, "cmdoption-arg-outside_layer_modules", false]], "overrides_config": [[43, "cmdoption-arg-overrides_config", false]], "overwrite_output_dir (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.overwrite_output_dir", false]], "parallel_jobs": [[43, "cmdoption-arg-parallel_jobs", false]], "past_key_value_name": [[43, "cmdoption-arg-past_key_value_name", false]], "per_channel": [[43, "cmdoption-arg-176", false], [43, "cmdoption-arg-57", false], [43, "cmdoption-arg-79", false], [43, "cmdoption-arg-per_channel", false]], "percdamp": [[43, "cmdoption-arg-percdamp", false]], "post": [[43, "cmdoption-arg-post", false]], "pre": [[43, "cmdoption-arg-pre", false]], "precision": [[43, "cmdoption-arg-precision", false]], "prepare_qnn_config": [[43, "cmdoption-arg-86", false], [43, "cmdoption-arg-prepare_qnn_config", false]], "preset": [[43, "cmdoption-arg-preset", false]], "provider_options_list": [[43, "cmdoption-arg-provider_options_list", false]], "providers_list": [[43, "cmdoption-arg-providers_list", false]], "ptl_data_module": [[43, "cmdoption-arg-ptl_data_module", false]], "ptl_module": [[43, "cmdoption-arg-ptl_module", false]], "pytorchmodelhandler (class in olive.model)": [[41, "olive.model.PyTorchModelHandler", false]], "q_group_size": [[43, "cmdoption-arg-q_group_size", false]], "qconfig_func": [[43, "cmdoption-arg-qconfig_func", false]], "qnn_extra_options": [[43, "cmdoption-arg-87", false], [43, "cmdoption-arg-qnn_extra_options", false]], "quant_format": [[43, "cmdoption-arg-143", false], [43, "cmdoption-arg-163", false], [43, "cmdoption-arg-179", false], [43, "cmdoption-arg-84", false], [43, "cmdoption-arg-quant_format", false]], "quant_level": [[43, "cmdoption-arg-136", false], [43, "cmdoption-arg-156", false], [43, "cmdoption-arg-quant_level", false]], "quant_mode": [[43, "cmdoption-arg-170", false], [43, "cmdoption-arg-51", false], [43, "cmdoption-arg-73", false], [43, "cmdoption-arg-quant_mode", false]], "quant_preprocess": [[43, "cmdoption-arg-177", false], [43, "cmdoption-arg-59", false], [43, "cmdoption-arg-81", false], [43, "cmdoption-arg-quant_preprocess", false]], "quant_type": [[43, "cmdoption-arg-quant_type", false]], "recipes": [[43, "cmdoption-arg-134", false], [43, "cmdoption-arg-154", false], [43, "cmdoption-arg-recipes", false]], "reduce_range": [[43, "cmdoption-arg-123", false], [43, "cmdoption-arg-135", false], [43, "cmdoption-arg-155", false], [43, "cmdoption-arg-58", false], [43, "cmdoption-arg-80", false], [43, "cmdoption-arg-reduce_range", false]], "resume_from_checkpoint (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.resume_from_checkpoint", false]], "rotate_mode": [[43, "cmdoption-arg-254", false], [43, "cmdoption-arg-rotate_mode", false]], "round_interval": [[43, "cmdoption-arg-round_interval", false]], "save_as_external_data": [[43, "cmdoption-arg-1", false], [43, "cmdoption-arg-101", false], [43, "cmdoption-arg-107", false], [43, "cmdoption-arg-11", false], [43, "cmdoption-arg-112", false], [43, "cmdoption-arg-117", false], [43, "cmdoption-arg-124", false], [43, "cmdoption-arg-144", false], [43, "cmdoption-arg-165", false], [43, "cmdoption-arg-184", false], [43, "cmdoption-arg-190", false], [43, "cmdoption-arg-196", false], [43, "cmdoption-arg-20", false], [43, "cmdoption-arg-201", false], [43, "cmdoption-arg-206", false], [43, "cmdoption-arg-216", false], [43, "cmdoption-arg-25", false], [43, "cmdoption-arg-279", false], [43, "cmdoption-arg-31", false], [43, "cmdoption-arg-36", false], [43, "cmdoption-arg-41", false], [43, "cmdoption-arg-46", false], [43, "cmdoption-arg-6", false], [43, "cmdoption-arg-68", false], [43, "cmdoption-arg-95", false], [43, "cmdoption-arg-save_as_external_data", false]], "save_format": [[43, "cmdoption-arg-save_format", false]], "save_metadata_for_token_generation": [[43, "cmdoption-arg-save_metadata_for_token_generation", false]], "save_quant_config": [[43, "cmdoption-arg-235", false], [43, "cmdoption-arg-save_quant_config", false]], "script_dir": [[43, "cmdoption-arg-17", false], [43, "cmdoption-arg-212", false], [43, "cmdoption-arg-246", false], [43, "cmdoption-arg-257", false], [43, "cmdoption-arg-260", false], [43, "cmdoption-arg-268", false], [43, "cmdoption-arg-270", false], [43, "cmdoption-arg-script_dir", false]], "search": [[43, "cmdoption-arg-search", false]], "seed": [[43, "cmdoption-arg-250", false], [43, "cmdoption-arg-252", false], [43, "cmdoption-arg-253", false], [43, "cmdoption-arg-seed", false], [44, "cmdoption-arg-1", false], [44, "cmdoption-arg-seed", false]], "size_threshold": [[43, "cmdoption-arg-104", false], [43, "cmdoption-arg-110", false], [43, "cmdoption-arg-115", false], [43, "cmdoption-arg-120", false], [43, "cmdoption-arg-127", false], [43, "cmdoption-arg-14", false], [43, "cmdoption-arg-147", false], [43, "cmdoption-arg-168", false], [43, "cmdoption-arg-187", false], [43, "cmdoption-arg-193", false], [43, "cmdoption-arg-199", false], [43, "cmdoption-arg-204", false], [43, "cmdoption-arg-209", false], [43, "cmdoption-arg-219", false], [43, "cmdoption-arg-23", false], [43, "cmdoption-arg-28", false], [43, "cmdoption-arg-282", false], [43, "cmdoption-arg-34", false], [43, "cmdoption-arg-39", false], [43, "cmdoption-arg-4", false], [43, "cmdoption-arg-44", false], [43, "cmdoption-arg-49", false], [43, "cmdoption-arg-71", false], [43, "cmdoption-arg-9", false], [43, "cmdoption-arg-98", false], [43, "cmdoption-arg-size_threshold", false]], "snpemodelhandler (class in olive.model)": [[41, "olive.model.SNPEModelHandler", false]], "source_dtype": [[43, "cmdoption-arg-source_dtype", false]], "sparsity": [[43, "cmdoption-arg-251", false], [43, "cmdoption-arg-sparsity", false]], "static_groups": [[43, "cmdoption-arg-static_groups", false]], "strict": [[43, "cmdoption-arg-strict", false]], "surgeries": [[43, "cmdoption-arg-surgeries", false]], "sym": [[43, "cmdoption-arg-sym", false]], "target_device": [[43, "cmdoption-arg-277", false], [43, "cmdoption-arg-target_device", false]], "target_dtype": [[43, "cmdoption-arg-target_dtype", false]], "target_modules": [[43, "cmdoption-arg-target_modules", false]], "target_opset": [[43, "cmdoption-arg-0", false], [43, "cmdoption-arg-189", false], [43, "cmdoption-arg-213", false], [43, "cmdoption-arg-278", false], [43, "cmdoption-arg-target_opset", false]], "tool_command": [[43, "cmdoption-arg-tool_command", false]], "tool_command_args": [[43, "cmdoption-arg-tool_command_args", false]], "torch_dtype": [[43, "cmdoption-arg-224", false], [43, "cmdoption-arg-229", false], [43, "cmdoption-arg-240", false], [43, "cmdoption-arg-torch_dtype", false]], "train_data_config": [[43, "cmdoption-arg-231", false], [43, "cmdoption-arg-242", false], [43, "cmdoption-arg-247", false], [43, "cmdoption-arg-train_data_config", false]], "training_args": [[43, "cmdoption-arg-233", false], [43, "cmdoption-arg-244", false], [43, "cmdoption-arg-255", false], [43, "cmdoption-arg-training_args", false]], "training_loop_func": [[43, "cmdoption-arg-training_loop_func", false]], "trt_fp16_enable": [[43, "cmdoption-arg-trt_fp16_enable", false]], "true_sequential": [[43, "cmdoption-arg-true_sequential", false]], "tuning_criterion": [[43, "cmdoption-arg-138", false], [43, "cmdoption-arg-158", false], [43, "cmdoption-arg-tuning_criterion", false]], "use_dynamo_exporter": [[43, "cmdoption-arg-use_dynamo_exporter", false]], "use_enhanced_quantizer": [[43, "cmdoption-arg-use_enhanced_quantizer", false]], "use_external_data_format": [[43, "cmdoption-arg-use_external_data_format", false]], "use_forced_decoder_ids": [[43, "cmdoption-arg-use_forced_decoder_ids", false]], "use_gpu": [[43, "cmdoption-arg-195", false], [43, "cmdoption-arg-use_gpu", false]], "use_gqa": [[43, "cmdoption-arg-use_gqa", false]], "use_int4": [[43, "cmdoption-arg-use_int4", false]], "use_logits_processor": [[43, "cmdoption-arg-use_logits_processor", false]], "use_prefix_vocab_mask": [[43, "cmdoption-arg-use_prefix_vocab_mask", false]], "use_symbolic_shape_infer": [[43, "cmdoption-arg-use_symbolic_shape_infer", false]], "use_temperature": [[43, "cmdoption-arg-use_temperature", false]], "use_transpose_op": [[43, "cmdoption-arg-use_transpose_op", false]], "use_vocab_mask": [[43, "cmdoption-arg-use_vocab_mask", false]], "user_script": [[43, "cmdoption-arg-16", false], [43, "cmdoption-arg-211", false], [43, "cmdoption-arg-245", false], [43, "cmdoption-arg-256", false], [43, "cmdoption-arg-259", false], [43, "cmdoption-arg-267", false], [43, "cmdoption-arg-269", false], [43, "cmdoption-arg-user_script", false]], "val_data_config": [[43, "cmdoption-arg-val_data_config", false]], "version": [[43, "cmdoption-arg-version", false]], "w_bit": [[43, "cmdoption-arg-w_bit", false]], "warmup_ratio (olive.passes.pytorch.lora.hftrainingarguments attribute)": [[43, "olive.passes.pytorch.lora.HFTrainingArguments.warmup_ratio", false]], "weight_only_config": [[43, "cmdoption-arg-140", false], [43, "cmdoption-arg-160", false], [43, "cmdoption-arg-weight_only_config", false]], "weight_only_quant_configs": [[43, "cmdoption-arg-weight_only_quant_configs", false]], "weight_type": [[43, "cmdoption-arg-172", false], [43, "cmdoption-arg-52", false], [43, "cmdoption-arg-74", false], [43, "cmdoption-arg-weight_type", false]], "weightsymmetric": [[43, "cmdoption-arg-182", false], [43, "cmdoption-arg-63", false], [43, "cmdoption-arg-90", false], [43, "cmdoption-arg-WeightSymmetric", false]], "with_replacement": [[44, "cmdoption-arg-with_replacement", false]], "workspace": [[43, "cmdoption-arg-133", false], [43, "cmdoption-arg-153", false], [43, "cmdoption-arg-workspace", false]], "zero_point": [[43, "cmdoption-arg-zero_point", false]]}, "objects": {"": [[43, 3, 1, "cmdoption-arg-ActivationSymmetric", "ActivationSymmetric"], [43, 3, 1, "cmdoption-arg-AddQDQPairToWeight", "AddQDQPairToWeight"], [43, 3, 1, "cmdoption-arg-EnableSubgraph", "EnableSubgraph"], [43, 3, 1, "cmdoption-arg-ForceQuantizeNoInputCheck", "ForceQuantizeNoInputCheck"], [43, 3, 1, "cmdoption-arg-MatMulConstBOnly", "MatMulConstBOnly"], [43, 3, 1, "cmdoption-arg-WeightSymmetric", "WeightSymmetric"], [43, 3, 1, "cmdoption-arg-a_bits", "a_bits"], [43, 3, 1, "cmdoption-arg-a_per_token", "a_per_token"], [43, 3, 1, "cmdoption-arg-a_symmetric", "a_symmetric"], [43, 3, 1, "cmdoption-arg-accuracy_level", "accuracy_level"], [43, 3, 1, "cmdoption-arg-activation_type", "activation_type"], [43, 3, 1, "cmdoption-arg-add_zero_point", "add_zero_point"], [43, 3, 1, "cmdoption-arg-algorithm", "algorithm"], [43, 3, 1, "cmdoption-arg-all_tensors_to_one_file", "all_tensors_to_one_file"], [43, 3, 1, "cmdoption-arg-allow_tf32", "allow_tf32"], [43, 3, 1, "cmdoption-arg-append_first_op_types_to_quantize_list", "append_first_op_types_to_quantize_list"], [43, 3, 1, "cmdoption-arg-approach", "approach"], [43, 3, 1, "cmdoption-arg-atol", "atol"], [43, 3, 1, "cmdoption-arg-backend", "backend"], [43, 3, 1, "cmdoption-arg-binary_file", "binary_file"], [43, 3, 1, "cmdoption-arg-bits", "bits"], [43, 3, 1, "cmdoption-arg-block_size", "block_size"], [43, 3, 1, "cmdoption-arg-block_to_split", "block_to_split"], [43, 3, 1, "cmdoption-arg-blocksize", "blocksize"], [43, 3, 1, "cmdoption-arg-calibrate_method", "calibrate_method"], [43, 3, 1, "cmdoption-arg-calibration_batch_size", "calibration_batch_size"], [43, 3, 1, "cmdoption-arg-calibration_data_config", "calibration_data_config"], [43, 3, 1, "cmdoption-arg-calibration_nsamples", "calibration_nsamples"], [43, 3, 1, "cmdoption-arg-calibration_sampling_size", "calibration_sampling_size"], [43, 3, 1, "cmdoption-arg-checkpoint_path", "checkpoint_path"], [43, 3, 1, "cmdoption-arg-components", "components"], [43, 3, 1, "cmdoption-arg-compress_to_fp16", "compress_to_fp16"], [43, 3, 1, "cmdoption-arg-compute_dtype", "compute_dtype"], [43, 3, 1, "cmdoption-arg-convert_attribute", "convert_attribute"], [43, 3, 1, "cmdoption-arg-cost_model", "cost_model"], [43, 3, 1, "cmdoption-arg-cpu_cores", "cpu_cores"], [43, 3, 1, "cmdoption-arg-damp_percent", "damp_percent"], [43, 3, 1, "cmdoption-arg-data_config", "data_config"], [43, 3, 1, "cmdoption-arg-desc_act", "desc_act"], [43, 3, 1, "cmdoption-arg-device", "device"], [43, 3, 1, "cmdoption-arg-dim_param", "dim_param"], [43, 3, 1, "cmdoption-arg-dim_value", "dim_value"], [43, 3, 1, "cmdoption-arg-do_validate", "do_validate"], [43, 3, 1, "cmdoption-arg-domain", "domain"], [43, 3, 1, "cmdoption-arg-double_quant", "double_quant"], [43, 3, 1, "cmdoption-arg-duo_scaling", "duo_scaling"], [43, 3, 1, "cmdoption-arg-dynamic", "dynamic"], [43, 3, 1, "cmdoption-arg-dynamic_lora_r", "dynamic_lora_r"], [43, 3, 1, "cmdoption-arg-element_wise_binary_ops", "element_wise_binary_ops"], [43, 3, 1, "cmdoption-arg-enable_cuda_graph", "enable_cuda_graph"], [43, 3, 1, "cmdoption-arg-enable_dpu", "enable_dpu"], [43, 3, 1, "cmdoption-arg-enable_htp", "enable_htp"], [43, 3, 1, "cmdoption-arg-enable_profiling", "enable_profiling"], [43, 3, 1, "cmdoption-arg-eval_data_config", "eval_data_config"], [43, 3, 1, "cmdoption-arg-example_input_func", "example_input_func"], [43, 3, 1, "cmdoption-arg-exclude_embeds", "exclude_embeds"], [43, 3, 1, "cmdoption-arg-exclude_lm_head", "exclude_lm_head"], [43, 3, 1, "cmdoption-arg-excluded_precisions", "excluded_precisions"], [43, 3, 1, "cmdoption-arg-execution_mode_list", "execution_mode_list"], [43, 3, 1, "cmdoption-arg-export_compatible", "export_compatible"], [43, 3, 1, "cmdoption-arg-external_data_name", "external_data_name"], [43, 3, 1, "cmdoption-arg-extra_args", "extra_args"], [43, 3, 1, "cmdoption-arg-extra_configs", "extra_configs"], [43, 3, 1, "cmdoption-arg-extra_options", "extra_options"], [43, 3, 1, "cmdoption-arg-extra_session_config", "extra_session_config"], [43, 3, 1, "cmdoption-arg-final_orientation", "final_orientation"], [43, 3, 1, "cmdoption-arg-float16", "float16"], [43, 3, 1, "cmdoption-arg-force_evaluate_other_eps", "force_evaluate_other_eps"], [43, 3, 1, "cmdoption-arg-force_fp16_inputs", "force_fp16_inputs"], [43, 3, 1, "cmdoption-arg-force_fp32_nodes", "force_fp32_nodes"], [43, 3, 1, "cmdoption-arg-force_fp32_ops", "force_fp32_ops"], [43, 3, 1, "cmdoption-arg-fp16", "fp16"], [43, 3, 1, "cmdoption-arg-fuse_layernorm", "fuse_layernorm"], [43, 3, 1, "cmdoption-arg-gpus", "gpus"], [44, 3, 1, "cmdoption-arg-group", "group"], [43, 3, 1, "cmdoption-arg-group_size", "group_size"], [43, 3, 1, "cmdoption-arg-hidden_size", "hidden_size"], [43, 3, 1, "cmdoption-arg-htp_socs", "htp_socs"], [43, 3, 1, "cmdoption-arg-ignored_scope", "ignored_scope"], [43, 3, 1, "cmdoption-arg-ignored_scope_type", "ignored_scope_type"], [43, 3, 1, "cmdoption-arg-input", "input"], [43, 3, 1, "cmdoption-arg-input_dim", "input_dim"], [43, 3, 1, "cmdoption-arg-input_int32", "input_int32"], [43, 3, 1, "cmdoption-arg-input_layouts", "input_layouts"], [43, 3, 1, "cmdoption-arg-input_model_dtype", "input_model_dtype"], [43, 3, 1, "cmdoption-arg-input_name", "input_name"], [43, 3, 1, "cmdoption-arg-input_names", "input_names"], [43, 3, 1, "cmdoption-arg-input_nodes", "input_nodes"], [43, 3, 1, "cmdoption-arg-input_shape", "input_shape"], [43, 3, 1, "cmdoption-arg-input_shapes", "input_shapes"], [43, 3, 1, "cmdoption-arg-input_types", "input_types"], [43, 3, 1, "cmdoption-arg-inputs_to_make_channel_last", "inputs_to_make_channel_last"], [43, 3, 1, "cmdoption-arg-inside_layer_modules", "inside_layer_modules"], [43, 3, 1, "cmdoption-arg-int4_accuracy_level", "int4_accuracy_level"], [43, 3, 1, "cmdoption-arg-int4_block_size", "int4_block_size"], [43, 3, 1, "cmdoption-arg-inter_thread_num_list", "inter_thread_num_list"], [43, 3, 1, "cmdoption-arg-intra_thread_num_list", "intra_thread_num_list"], [43, 3, 1, "cmdoption-arg-io_bind", "io_bind"], [43, 3, 1, "cmdoption-arg-is_symmetric", "is_symmetric"], [43, 3, 1, "cmdoption-arg-keep_io_types", "keep_io_types"], [43, 3, 1, "cmdoption-arg-layer_name_filter", "layer_name_filter"], [43, 3, 1, "cmdoption-arg-layers_block_name", "layers_block_name"], [43, 3, 1, "cmdoption-arg-lib_name", "lib_name"], [43, 3, 1, "cmdoption-arg-lib_targets", "lib_targets"], [43, 3, 1, "cmdoption-arg-loftq_iter", "loftq_iter"], [43, 3, 1, "cmdoption-arg-logger", "logger"], [43, 3, 1, "cmdoption-arg-lora_alpha", "lora_alpha"], [43, 3, 1, "cmdoption-arg-lora_dropout", "lora_dropout"], [43, 3, 1, "cmdoption-arg-lora_r", "lora_r"], [43, 3, 1, "cmdoption-arg-make_inputs", "make_inputs"], [43, 3, 1, "cmdoption-arg-max_finite_val", "max_finite_val"], [43, 3, 1, "cmdoption-arg-max_layer", "max_layer"], [43, 3, 1, "cmdoption-arg-merge_adapter_weights", "merge_adapter_weights"], [43, 3, 1, "cmdoption-arg-metadata_only", "metadata_only"], [43, 3, 1, "cmdoption-arg-metric", "metric"], [43, 3, 1, "cmdoption-arg-min_layer", "min_layer"], [43, 3, 1, "cmdoption-arg-min_positive_val", "min_positive_val"], [43, 3, 1, "cmdoption-arg-model_type", "model_type"], [43, 3, 1, "cmdoption-arg-modules_to_fuse", "modules_to_fuse"], [43, 3, 1, "cmdoption-arg-modules_to_not_convert", "modules_to_not_convert"], [43, 3, 1, "cmdoption-arg-modules_to_save", "modules_to_save"], [44, 3, 1, "cmdoption-arg-multivariate", "multivariate"], [43, 3, 1, "cmdoption-arg-name_pattern", "name_pattern"], [43, 3, 1, "cmdoption-arg-need_layer_fusing", "need_layer_fusing"], [43, 3, 1, "cmdoption-arg-no_repeat_ngram_size", "no_repeat_ngram_size"], [43, 3, 1, "cmdoption-arg-node_block_list", "node_block_list"], [43, 3, 1, "cmdoption-arg-nodes_to_exclude", "nodes_to_exclude"], [43, 3, 1, "cmdoption-arg-nodes_to_quantize", "nodes_to_quantize"], [43, 3, 1, "cmdoption-arg-num_epochs", "num_epochs"], [43, 3, 1, "cmdoption-arg-num_heads", "num_heads"], [43, 3, 1, "cmdoption-arg-num_key_value_heads", "num_key_value_heads"], [44, 3, 1, "cmdoption-arg-num_samples", "num_samples"], [43, 3, 1, "cmdoption-arg-num_splits", "num_splits"], [43, 3, 1, "cmdoption-arg-num_steps", "num_steps"], [43, 3, 1, "cmdoption-arg-only_onnxruntime", "only_onnxruntime"], [43, 3, 1, "cmdoption-arg-op_block_list", "op_block_list"], [43, 3, 1, "cmdoption-arg-op_type_dict", "op_type_dict"], [43, 3, 1, "cmdoption-arg-op_types_to_quantize", "op_types_to_quantize"], [43, 3, 1, "cmdoption-arg-opt_level", "opt_level"], [43, 3, 1, "cmdoption-arg-opt_level_list", "opt_level_list"], [43, 3, 1, "cmdoption-arg-optimization_options", "optimization_options"], [43, 3, 1, "cmdoption-arg-optimize_model", "optimize_model"], [43, 3, 1, "cmdoption-arg-optional_inputs", "optional_inputs"], [43, 3, 1, "cmdoption-arg-out_node", "out_node"], [43, 3, 1, "cmdoption-arg-output_model", "output_model"], [43, 3, 1, "cmdoption-arg-output_names", "output_names"], [43, 3, 1, "cmdoption-arg-output_nodes", "output_nodes"], [43, 3, 1, "cmdoption-arg-outputs_to_make_channel_last", "outputs_to_make_channel_last"], [43, 3, 1, "cmdoption-arg-outside_layer_modules", "outside_layer_modules"], [43, 3, 1, "cmdoption-arg-overrides_config", "overrides_config"], [43, 3, 1, "cmdoption-arg-parallel_jobs", "parallel_jobs"], [43, 3, 1, "cmdoption-arg-past_key_value_name", "past_key_value_name"], [43, 3, 1, "cmdoption-arg-per_channel", "per_channel"], [43, 3, 1, "cmdoption-arg-percdamp", "percdamp"], [43, 3, 1, "cmdoption-arg-post", "post"], [43, 3, 1, "cmdoption-arg-pre", "pre"], [43, 3, 1, "cmdoption-arg-precision", "precision"], [43, 3, 1, "cmdoption-arg-prepare_qnn_config", "prepare_qnn_config"], [43, 3, 1, "cmdoption-arg-preset", "preset"], [43, 3, 1, "cmdoption-arg-provider_options_list", "provider_options_list"], [43, 3, 1, "cmdoption-arg-providers_list", "providers_list"], [43, 3, 1, "cmdoption-arg-ptl_data_module", "ptl_data_module"], [43, 3, 1, "cmdoption-arg-ptl_module", "ptl_module"], [43, 3, 1, "cmdoption-arg-q_group_size", "q_group_size"], [43, 3, 1, "cmdoption-arg-qconfig_func", "qconfig_func"], [43, 3, 1, "cmdoption-arg-qnn_extra_options", "qnn_extra_options"], [43, 3, 1, "cmdoption-arg-quant_format", "quant_format"], [43, 3, 1, "cmdoption-arg-quant_level", "quant_level"], [43, 3, 1, "cmdoption-arg-quant_mode", "quant_mode"], [43, 3, 1, "cmdoption-arg-quant_preprocess", "quant_preprocess"], [43, 3, 1, "cmdoption-arg-quant_type", "quant_type"], [43, 3, 1, "cmdoption-arg-recipes", "recipes"], [43, 3, 1, "cmdoption-arg-reduce_range", "reduce_range"], [43, 3, 1, "cmdoption-arg-rotate_mode", "rotate_mode"], [43, 3, 1, "cmdoption-arg-round_interval", "round_interval"], [43, 3, 1, "cmdoption-arg-save_as_external_data", "save_as_external_data"], [43, 3, 1, "cmdoption-arg-save_format", "save_format"], [43, 3, 1, "cmdoption-arg-save_metadata_for_token_generation", "save_metadata_for_token_generation"], [43, 3, 1, "cmdoption-arg-save_quant_config", "save_quant_config"], [43, 3, 1, "cmdoption-arg-script_dir", "script_dir"], [43, 3, 1, "cmdoption-arg-search", "search"], [43, 3, 1, "cmdoption-arg-seed", "seed"], [43, 3, 1, "cmdoption-arg-size_threshold", "size_threshold"], [43, 3, 1, "cmdoption-arg-source_dtype", "source_dtype"], [43, 3, 1, "cmdoption-arg-sparsity", "sparsity"], [43, 3, 1, "cmdoption-arg-static_groups", "static_groups"], [43, 3, 1, "cmdoption-arg-strict", "strict"], [43, 3, 1, "cmdoption-arg-surgeries", "surgeries"], [43, 3, 1, "cmdoption-arg-sym", "sym"], [43, 3, 1, "cmdoption-arg-target_device", "target_device"], [43, 3, 1, "cmdoption-arg-target_dtype", "target_dtype"], [43, 3, 1, "cmdoption-arg-target_modules", "target_modules"], [43, 3, 1, "cmdoption-arg-target_opset", "target_opset"], [43, 3, 1, "cmdoption-arg-tool_command", "tool_command"], [43, 3, 1, "cmdoption-arg-tool_command_args", "tool_command_args"], [43, 3, 1, "cmdoption-arg-torch_dtype", "torch_dtype"], [43, 3, 1, "cmdoption-arg-train_data_config", "train_data_config"], [43, 3, 1, "cmdoption-arg-training_args", "training_args"], [43, 3, 1, "cmdoption-arg-training_loop_func", "training_loop_func"], [43, 3, 1, "cmdoption-arg-trt_fp16_enable", "trt_fp16_enable"], [43, 3, 1, "cmdoption-arg-true_sequential", "true_sequential"], [43, 3, 1, "cmdoption-arg-tuning_criterion", "tuning_criterion"], [43, 3, 1, "cmdoption-arg-use_dynamo_exporter", "use_dynamo_exporter"], [43, 3, 1, "cmdoption-arg-use_enhanced_quantizer", "use_enhanced_quantizer"], [43, 3, 1, "cmdoption-arg-use_external_data_format", "use_external_data_format"], [43, 3, 1, "cmdoption-arg-use_forced_decoder_ids", "use_forced_decoder_ids"], [43, 3, 1, "cmdoption-arg-use_gpu", "use_gpu"], [43, 3, 1, "cmdoption-arg-use_gqa", "use_gqa"], [43, 3, 1, "cmdoption-arg-use_int4", "use_int4"], [43, 3, 1, "cmdoption-arg-use_logits_processor", "use_logits_processor"], [43, 3, 1, "cmdoption-arg-use_prefix_vocab_mask", "use_prefix_vocab_mask"], [43, 3, 1, "cmdoption-arg-use_symbolic_shape_infer", "use_symbolic_shape_infer"], [43, 3, 1, "cmdoption-arg-use_temperature", "use_temperature"], [43, 3, 1, "cmdoption-arg-use_transpose_op", "use_transpose_op"], [43, 3, 1, "cmdoption-arg-use_vocab_mask", "use_vocab_mask"], [43, 3, 1, "cmdoption-arg-user_script", "user_script"], [43, 3, 1, "cmdoption-arg-val_data_config", "val_data_config"], [43, 3, 1, "cmdoption-arg-version", "version"], [43, 3, 1, "cmdoption-arg-w_bit", "w_bit"], [43, 3, 1, "cmdoption-arg-weight_only_config", "weight_only_config"], [43, 3, 1, "cmdoption-arg-weight_only_quant_configs", "weight_only_quant_configs"], [43, 3, 1, "cmdoption-arg-weight_type", "weight_type"], [44, 3, 1, "cmdoption-arg-with_replacement", "with_replacement"], [43, 3, 1, "cmdoption-arg-workspace", "workspace"], [43, 3, 1, "cmdoption-arg-zero_point", "zero_point"]], "extra.Sigmoid": [[43, 3, 1, "cmdoption-arg-extra.Sigmoid.nnapi", "nnapi"]], "olive.model": [[41, 0, 1, "", "CompositeModelHandler"], [41, 0, 1, "", "DistributedHfModelHandler"], [41, 0, 1, "", "DistributedOnnxModelHandler"], [41, 0, 1, "", "HfModelHandler"], [41, 0, 1, "", "ModelConfig"], [41, 0, 1, "", "ONNXModelHandler"], [41, 0, 1, "", "OpenVINOModelHandler"], [41, 0, 1, "", "PyTorchModelHandler"], [41, 0, 1, "", "SNPEModelHandler"]], "olive.passes.pytorch.lora": [[43, 1, 1, "", "HFTrainingArguments"]], "olive.passes.pytorch.lora.HFTrainingArguments": [[43, 2, 1, "", "evaluation_strategy"], [43, 2, 1, "", "learning_rate"], [43, 2, 1, "", "lr_scheduler_type"], [43, 2, 1, "", "optim"], [43, 2, 1, "", "overwrite_output_dir"], [43, 2, 1, "", "resume_from_checkpoint"], [43, 2, 1, "", "warmup_ratio"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "pydantic_settings", "Python settings"], "2": ["py", "pydantic_field", "Python field"], "3": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:class", "1": "py:pydantic_settings", "2": "py:pydantic_field", "3": "std:cmdoption"}, "terms": {"": [1, 8, 17, 18, 28, 31, 42, 43, 45], "0": [1, 10, 11, 13, 17, 18, 19, 22, 23, 24, 26, 27, 29, 35, 39, 42, 43], "000": 29, "0002": [24, 43], "00456": 43, "00774": 43, "01": [1, 10, 19, 23, 42, 43], "03": 43, "04": [11, 35, 42], "05": 43, "06": 43, "07": 43, "0_onnxconvers": 27, "1": [1, 10, 11, 17, 18, 19, 20, 22, 26, 27, 29, 30, 35, 39, 42, 43, 44], "10": [27, 30, 42, 43], "100": [7, 8, 10, 43], "10000": 43, "1001": 26, "1024": [22, 39, 43], "12": [5, 35, 42, 43], "120": [13, 39], "128": [17, 39, 43], "13": [18, 30, 42], "14": [27, 30, 43], "1431c563dcfda9c9c3bf26c5d61ef58": 27, "15": [26, 39], "15024": 43, "15531": 43, "16": [19, 22, 24, 28, 39, 43], "16406": 43, "17": [39, 42, 43], "175b": 24, "176b": 24, "18": [26, 43], "1_orttransformersoptim": 27, "1b": [5, 6, 7, 8, 9], "1e": 43, "2": [10, 13, 18, 19, 22, 24, 27, 28, 31, 33, 34, 35, 39, 42, 43], "20": [10, 19, 42], "200": [5, 6, 29], "2023": [23, 43], "203": 29, "2048": 18, "2147483648": 22, "22": 43, "224": [17, 30, 43], "2301": 43, "2309": 43, "2313": 27, "24": 35, "2401": 43, "240101": 26, "2404": 43, "2405": 43, "256": [39, 43], "299": 26, "2_onnxquant": 27, "2gb": [28, 43], "3": [11, 17, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34, 39, 42, 43], "3000": [11, 42], "3072": 18, "32": [23, 28, 33, 39, 43], "32bit": 26, "36": 27, "360m": [5, 6], "3_ortsessionparamstun": 27, "3x": [8, 34], "4": [0, 1, 3, 5, 11, 22, 24, 27, 31, 33, 34, 35, 39, 42, 43], "40": [9, 35, 36, 45], "4000": [11, 42], "42": 43, "44": 5, "4b": 43, "4k": [33, 39], "5": [3, 11, 24, 27, 28, 42, 43], "50": 43, "512": 18, "53fc6781998a4624b61959bb064622c": 27, "6": [3, 26, 35, 39], "60": 24, "60sec": [5, 6], "63442": 43, "64": [27, 39, 43], "7": 43, "768": 42, "7a320d6d630bced3548f242238392730": 27, "7b": [18, 42], "8": [17, 18, 23, 24, 25, 26, 33, 34, 35, 39, 43], "80": 43, "8602941176470589": 27, "9": 42, "96": 35, "99": [27, 43], "A": [2, 8, 13, 25, 27, 29, 35, 39, 43], "As": [8, 22, 33, 35], "At": 10, "But": 17, "By": [42, 43], "For": [3, 5, 6, 9, 10, 11, 17, 18, 22, 27, 28, 30, 32, 35, 36, 39, 42, 43, 45], "If": [3, 5, 7, 8, 10, 11, 13, 15, 16, 17, 18, 19, 20, 22, 25, 26, 27, 30, 33, 35, 37, 39, 42, 43, 44], "In": [2, 5, 6, 9, 10, 11, 12, 17, 19, 22, 28, 29, 30, 33, 35, 42, 43], "It": [2, 3, 5, 6, 12, 15, 18, 22, 23, 24, 27, 32, 34, 35, 39, 42, 43], "Its": 42, "NOT": [10, 43], "No": [22, 27], "On": 42, "One": 43, "Such": 28, "That": 43, "The": [2, 3, 5, 6, 7, 8, 9, 10, 11, 13, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34, 35, 36, 39, 41, 42, 43, 44, 45], "Then": [11, 12, 15, 17, 18], "There": [17, 27, 33, 35, 39, 42], "These": [22, 28, 33, 35, 39], "To": [2, 3, 8, 15, 16, 22, 32, 38, 42, 43], "Will": 39, "With": [5, 6, 10, 16, 18, 28, 34, 37], "_": [17, 27], "__getitem__": 1, "__init__": 1, "__len__": 1, "__model_input__": 22, "__model_output__": 22, "_default_config": 3, "_dummy_input": 39, "_io_config": 39, "_model_file_format": 39, "_model_load": 39, "_run_for_config": 3, "a843d77ae4964c04e145b83567fb5b05": 27, "a_bit": [34, 43], "a_per_token": 43, "a_symmetr": 43, "aarch64": 43, "ab": 43, "abil": [1, 33], "abl": [12, 42], "abort": [5, 6], "about": [3, 4, 9, 16, 18, 22, 23, 24, 25, 26, 30, 33, 34, 38, 40, 42, 43, 44], "abov": [10, 17, 18, 24, 30, 35, 42], "absolut": [23, 43], "acceler": [3, 5, 9, 10, 22, 24, 25, 26, 27, 30, 31, 33, 39, 42, 43, 45], "accelerator_spec": [3, 27], "accelerator_typ": 3, "acceleratorspec": 3, "accept": [3, 39, 43], "access": [5, 6, 12, 18, 42], "accompani": 45, "accord": 29, "account": [11, 16, 18, 39, 42], "account_nam": [16, 39], "accur": 8, "accuraci": [1, 2, 10, 18, 27, 30, 33, 34, 36, 39, 42, 43, 45], "accuracy_custom": 19, "accuracy_data_config": [1, 19, 42], "accuracy_level": 43, "accuracy_scor": [1, 19, 42], "accuracylevel": 43, "achiev": [2, 33, 45], "across": [5, 22, 27, 33], "activ": [2, 8, 10, 23, 24, 33, 34, 37, 39, 43], "activation_typ": [33, 43], "activationsymmetr": 43, "actord": 43, "actual": 23, "ad": [1, 4, 18, 22, 42, 43], "adamw": 43, "adapt": [8, 24, 36, 42, 43], "adapter_format": 39, "adapter_path": [7, 8, 39, 41], "adapter_weight": 7, "adaptor": 43, "add": [4, 15, 16, 17, 18, 20, 22, 27, 29, 30, 39, 42, 43], "add_external_initi": [22, 43], "add_initi": 22, "add_output": 29, "add_qdq_pair_to_weight": 43, "add_qtype_convert": 43, "add_special_token": 39, "add_zero_output_0": 29, "add_zero_point": 43, "addit": [17, 20, 23, 33, 37, 43], "addnod": 29, "addqdqpairtoweight": 43, "adjust": 43, "advanc": [33, 39, 45], "advantag": [31, 43], "advent": 28, "affect": [16, 43], "after": [5, 6, 7, 11, 22, 24, 28, 29, 39, 42, 43], "again": 43, "against": [35, 43], "ai": [0, 5, 6, 7, 15, 16, 22, 23, 25, 26, 30, 35, 36, 37, 42, 43, 45], "algorithm": [8, 24, 28, 33, 34, 39, 40, 42, 43, 44, 45], "alia": [35, 42], "all": [0, 2, 5, 7, 8, 10, 11, 17, 27, 29, 30, 32, 39, 41, 42, 43], "all_tensors_to_one_fil": [9, 33, 43], "allow": [8, 9, 15, 16, 17, 18, 22, 23, 30, 43], "allow_tf32": 43, "alon": 30, "along": [2, 3, 17, 24, 39], "alpha": [39, 43], "alreadi": [27, 28, 43], "also": [2, 3, 16, 17, 18, 20, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 37, 39, 41, 42, 43], "alter": 42, "altern": [9, 19, 27, 30], "alwai": 43, "amd": [0, 5, 6, 42, 45], "aml": [11, 13, 18, 27, 39, 42], "aml_comput": [7, 11, 35, 39, 42], "aml_config": 13, "aml_config_path": [11, 13, 39], "aml_docker_config": [11, 35, 42], "aml_environment_config": 35, "aml_system": [11, 15, 18, 42], "among": [28, 42], "amper": 43, "ampl": 2, "an": [0, 2, 3, 5, 6, 7, 8, 13, 16, 17, 18, 19, 20, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 41, 42, 43, 45], "analysi": 43, "analyz": 22, "android": 43, "ani": [2, 3, 9, 11, 16, 17, 30, 32, 33, 36, 41, 42, 43], "anoth": [3, 10, 30, 42, 43], "answer": 39, "anywher": 23, "ao": [5, 6, 43], "apart": 43, "api": [5, 6, 34, 39, 43], "app": [5, 6, 7], "appear": 43, "append": [1, 43], "append_first_op_types_to_quantize_list": 43, "append_pre_post_processing_op": 43, "appendprepostprocessingop": [22, 42], "appli": [18, 22, 24, 27, 29, 33, 42, 43], "applic": [5, 6, 31, 43], "approach": [23, 24, 33, 43], "appropri": 43, "approxim": 29, "ar": [2, 3, 5, 6, 8, 11, 13, 16, 18, 22, 23, 24, 27, 28, 30, 33, 34, 35, 39, 41, 42, 43, 44, 45], "arc": [11, 14], "architectur": [2, 4, 25, 28, 30], "arena_extend_strategi": 22, "argmax": 22, "argument": [13, 17, 42, 43], "aris": 43, "arithmet": [23, 28], "around": [5, 6], "art": 8, "articl": 7, "articul": 8, "artifact": [7, 15, 35, 36, 42], "arxiv": 43, "ask": [5, 6], "asset": [11, 13, 14, 15, 27, 42], "assign": [28, 32, 42, 43], "assist": [5, 6, 7, 8, 42], "associ": [39, 43], "assum": [18, 42, 43], "asym": 43, "asymmetr": 39, "atol": 43, "attach": 12, "attent": [28, 42, 43], "attention_mask": [17, 18], "attribut": [1, 16, 18, 27, 35, 43], "audio": 0, "auroc": [1, 19, 42], "auth": [11, 42], "authent": 42, "auto": [2, 3, 5, 8, 36, 37, 43, 45], "auto_optimizer_config": 10, "autoawq": 43, "autoawqquant": [34, 42], "autogptq": 43, "autom": 28, "automat": [1, 6, 8, 11, 15, 18, 20, 22, 28, 36, 39, 42, 43, 45], "avail": [3, 6, 9, 10, 17, 18, 25, 26, 28, 29, 30, 32, 35, 37, 39, 41, 42, 43, 44], "averag": 43, "avg": [10, 19, 27, 42], "avoid": [17, 43], "awai": 15, "awar": [8, 10, 34, 42, 43], "awq": [8, 33, 34, 36, 39, 42, 43], "awq_lit": 33, "ax": [22, 42, 43], "axi": [42, 43], "az": [7, 16], "azur": [5, 6, 7, 20, 27, 35, 36, 38, 39, 45], "azureml": [0, 2, 7, 12, 13, 27], "azureml_cli": [11, 18, 20, 35], "azureml_client_config": 35, "azureml_datastor": [11, 20, 42], "azureml_job_output": [11, 20], "azureml_model": [11, 20], "azureml_registry_model": [11, 18, 20], "azureml_system": 35, "azuremlbatch": 27, "azuremlonlin": 27, "azuremlsystem": 2, "azurend12ssystem": 35, "azurend24rssystem": 35, "azurend24ssystem": 35, "azurend6ssystem": 35, "azurend96a100system": 35, "azurend96asystem": 35, "azurendv2system": [35, 42], "b": [5, 6, 22, 24, 30, 33, 43], "backend": [10, 18, 25, 33, 42, 43], "backpropag": 24, "bandwidth": [5, 6, 33], "base": [3, 5, 6, 7, 8, 10, 17, 22, 24, 27, 34, 35, 42, 43], "base_environment_id": 27, "base_imag": [11, 35, 42], "basic": [22, 30, 39, 43], "basic_quantization_flow": 43, "batch": [17, 19, 27, 30, 39, 42, 43], "batch_siz": [1, 10, 17, 18, 19, 22, 30, 39, 42, 43], "batchdeploy": 27, "batchnorm": 22, "beam": [42, 43], "beamsearch": [42, 43], "becaus": [8, 43], "becom": 28, "been": [2, 16, 22], "befor": [12, 13, 16, 17, 33, 39, 42, 43], "begin_of_text": [5, 6], "behavior": [29, 43], "being": [35, 39, 43], "belong": 3, "below": [5, 6, 7, 35, 43], "benefit": 38, "bert": [0, 10, 17, 22, 42, 43], "bert_gpu": 10, "bert_kera": 43, "bert_tf": 43, "besid": [17, 30, 41], "best": [2, 10, 19, 22, 27, 30, 32, 36, 42, 43, 45], "bestcandidatemodel": 27, "bestcandidatemodel_1": 27, "bestcandidatemodel_2": 27, "bestcandidatemodel_k": 27, "better": [10, 22, 42, 43], "between": [2, 10, 11, 18, 22, 28, 30, 34, 39, 42, 43], "bf16": 43, "bfloat16": [24, 39, 43], "bgr": 22, "bia": [22, 43], "bias": [22, 26], "bias_bitwidth": 43, "big": [34, 43], "bin": [35, 43], "binari": [42, 43], "binary_fil": 43, "bind": 43, "bit": [0, 8, 23, 24, 25, 26, 33, 34, 42, 43], "bitsandbyt": 43, "bitwidth": 8, "blob": 17, "block": [2, 39, 43], "block_siz": [39, 43], "block_to_split": 43, "blocksiz": 43, "blockwis": 39, "blog": [24, 28, 43], "bloom": [24, 43], "bn": 22, "bnb": 37, "bnb4": [8, 39], "bool": [10, 16, 27, 41, 42, 43, 44], "boolean": [35, 42], "both": [10, 19, 20, 30, 33, 35, 43], "broad": [5, 33], "broadcast": 22, "build": [2, 27, 35, 37, 43], "build_context_path": 35, "builder": 43, "built": [10, 22, 42, 45], "bundl": 43, "byte": [39, 43], "c": [5, 6, 13, 24, 25, 33, 39, 42, 43], "c499e39e42693aaab050820afd31e0c3": 27, "cach": [2, 9, 10, 14, 15, 25, 27, 42, 43, 45], "cache_config": 16, "cache_dir": [9, 10, 16, 27, 42], "calcul": [8, 19, 33, 43], "calib_data_config": [22, 23, 32, 33], "calib_data_coonfig": 42, "calibr": [8, 25, 33, 34, 43], "calibrate_method": [33, 43], "calibration_batch_s": 43, "calibration_data_config": [24, 43], "calibration_data_read": 43, "calibration_nsampl": 43, "calibration_sampling_s": [33, 43], "calibrationmethod": 43, "call": [5, 6, 7, 9, 10, 17, 19, 22, 24, 33, 34, 45], "callabl": [3, 16, 41, 43], "can": [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 42, 43, 45], "cancel": 43, "candid": [2, 27], "cannot": [5, 6, 22, 25, 26, 30, 35, 42, 43], "capabl": [5, 22, 36, 38], "captur": [36, 37, 42, 43, 45], "capturesplitinfo": [39, 42], "cascad": 28, "case": [10, 17, 25, 30, 32, 35, 39, 42, 43], "cast": [22, 39, 43], "catalog": [5, 6, 18, 36], "catch": 10, "categor": [3, 35, 43], "categori": 3, "caus": [11, 42, 43], "causal": 42, "cb1_uint8": 22, "cd": 37, "centercrop": 22, "certain": [2, 18, 30, 32, 43], "chain": [8, 22], "chang": [5, 6, 22, 43], "channel": 43, "charact": 43, "chat": [5, 6, 39], "chat_templ": [5, 6], "check": [15, 16, 22, 24, 26, 32, 33, 34, 43], "checkpoint": 43, "checkpoint_path": 43, "child": 41, "children": [39, 43], "chipset": 25, "choic": [39, 43], "choos": [6, 22, 28, 43], "chose": [2, 32], "cifar": 42, "cl": 3, "clang": [25, 43], "class": [1, 17, 41, 42, 43], "classif": [10, 17], "classmethod": 3, "clean": 42, "clean_cach": 42, "clean_evaluation_cach": 42, "cli": [5, 6, 7, 8, 18, 39, 40, 45], "client": 35, "clip": 43, "clone": 37, "close": 15, "cloud": 39, "cluster": [7, 11, 15, 35, 42], "code": [5, 6, 7, 17, 24, 27, 32, 37, 39, 42, 43], "code_fold": 27, "collect": [1, 41, 42, 43], "column": [24, 39, 43], "com": [11, 17, 22, 35, 37, 42, 43], "combin": [2, 10, 29, 32, 43], "come": 34, "comma": [39, 43], "command": [5, 6, 7, 8, 9, 12, 13, 15, 18, 22, 28, 36, 37, 43], "common": [22, 43, 45], "common_evalu": [10, 17, 27, 42], "common_pass": 17, "compact": 8, "compar": [8, 33, 34], "comparison": 10, "compat": [22, 33, 43], "compil": [25, 31, 42, 43], "complet": [7, 8, 16, 17, 18, 24, 30, 42], "complex": [33, 36], "compliant": 33, "compon": [2, 22, 28, 36, 41, 43, 45], "compos": [2, 17, 42], "composit": [42, 43], "compositemodel": 41, "compositemodelhandl": [41, 42, 43], "compress": [8, 28, 33, 43, 45], "compress_to_fp16": 43, "compressor": [0, 8, 9, 42, 43], "compris": 8, "comput": [0, 2, 5, 7, 8, 12, 13, 18, 22, 23, 26, 27, 29, 33, 35, 36, 38, 39, 42, 43], "compute_dtyp": [24, 43], "compute_logit": [5, 6, 7], "compute_nam": [7, 13, 39], "compute_param": 42, "concat": 22, "concatfromsequ": 22, "concept": [2, 35], "cond": 22, "conda": [5, 11, 35, 37, 42], "conda_file_path": [11, 35, 42], "condit": [3, 22, 30, 43], "conditionaldefault": [3, 43], "conduc": 34, "config": [2, 3, 9, 10, 11, 12, 13, 15, 20, 22, 23, 24, 25, 26, 30, 32, 33, 34, 35, 37, 39, 41, 43], "config_fil": [10, 15], "configu": 15, "configur": [1, 2, 5, 6, 9, 11, 18, 20, 39, 40, 42, 43, 44], "confirm": 39, "conflict": 17, "connect": [14, 28], "consecut": [22, 29], "conserv": 43, "consid": [28, 43], "consist": [2, 43], "consol": [5, 6], "consolid": [33, 34], "const": [22, 43], "constant": [22, 29, 43], "constant_inputs_file_nam": 41, "constraint": 43, "construct": [2, 45], "consum": [5, 6, 39, 43], "consumpt": 33, "contain": [2, 9, 16, 17, 23, 26, 27, 28, 32, 35, 39, 41, 42, 43], "container_nam": [16, 39], "content": [25, 43], "context": [5, 6, 24, 35, 42, 43], "continu": [15, 16, 43], "contrib": [39, 42, 43], "contribut": 37, "control": [5, 6, 30, 43], "conv": [22, 43], "conveni": 10, "convers": [0, 2, 9, 10, 36, 38, 43], "conversion_devic": 39, "convert": [5, 6, 7, 8, 9, 17, 22, 23, 25, 26, 28, 31, 33, 34, 42, 43], "convert_attribut": 43, "convert_float_to_float16": 43, "convertbgrtoimag": 22, "convertimagetobgr": 22, "copi": [5, 6, 7, 43], "core": [2, 16, 25, 26, 39, 43], "coreml": 30, "correct": 43, "correctli": [39, 43], "correspond": [22, 26, 30, 39, 43], "cosin": 43, "cost": [33, 43], "cost_model": [39, 43], "costsplit": 28, "could": [3, 35, 42, 43], "count": [22, 28, 35], "coupl": 13, "cover": [17, 22, 45], "cpu": [0, 3, 5, 6, 7, 8, 9, 11, 24, 27, 31, 34, 35, 37, 39, 42, 43, 45], "cpu_1": 27, "cpu_cor": [39, 43], "cpu_spr": 43, "cpuexecutionprovid": [3, 5, 6, 7, 8, 9, 22, 27, 35, 39, 43], "cr1_uint8": 22, "creat": [1, 2, 3, 5, 6, 7, 8, 13, 17, 18, 22, 27, 28, 34, 35, 36, 37, 39, 41, 42, 43], "create_dataload": 42, "create_stream": [5, 6, 7], "creation": [11, 27, 42], "credenti": [11, 42], "cricket": 7, "criteria": 39, "critic": 39, "cross": [5, 6, 22, 23, 30, 33], "csv": [28, 39, 43], "cuda": [0, 3, 5, 6, 10, 39, 43], "cuda11": 35, "cuda_perf_tun": 10, "cuda_transformers_optim": 10, "cudaexecutionprovid": [3, 5, 6, 10, 22, 28, 35, 39], "cudnn8": 35, "cudnn_conv_algo_search": 22, "curat": 20, "current": [2, 8, 10, 16, 17, 20, 28, 35, 39, 42, 43], "custom": [0, 2, 4, 16, 22, 24, 36, 39, 42, 43], "custom_io": 43, "customized_dataload": 17, "customized_huggingface_dataset": 17, "customized_huggingface_pre_process": 17, "customized_post_process": 17, "cut": [8, 36, 45], "cv": 43, "d": [13, 33, 39], "d1": 43, "d2": 43, "d_": 43, "damp": 43, "damp_perc": 43, "dampen": 43, "data": [1, 3, 15, 18, 19, 27, 30, 33, 36, 39, 42, 43], "data2": 43, "data_config": [1, 9, 10, 17, 18, 19, 22, 23, 26, 32, 33, 34, 42, 43], "data_dir": [11, 17, 19, 42], "data_fil": [17, 39], "data_nam": [7, 8, 10, 17, 18, 39], "data_typ": [22, 29, 43], "dataclass": 3, "dataconfig": 43, "datacontain": 17, "dataload": [1, 17, 43], "dataloader_config": [1, 10, 17, 22, 24, 26, 33, 42], "datamodul": 43, "dataset": [1, 4, 8, 17, 24, 25, 33, 34, 37, 39, 43], "dataset_1": 17, "dataset_2": 17, "datastor": [15, 35, 42], "datastore_nam": [11, 20, 42], "date": 15, "datetim": 43, "dd": 15, "dead": 22, "deadend": 22, "deberta": 0, "debug": [39, 43], "decid": [32, 42], "decis": 28, "decod": [5, 6, 7, 22, 41, 43], "decoder_input_id": 43, "decoder_model": 43, "decoder_with_past": 43, "decoder_with_past_model": 43, "decompos": 44, "decomposit": 24, "decreas": 30, "dedic": 43, "dedicated_qdq_pair": 43, "deep": [17, 23, 24, 26, 31, 33, 34], "def": [1, 3, 17, 19, 39], "default": [0, 2, 3, 5, 6, 10, 11, 12, 13, 15, 16, 17, 18, 24, 27, 30, 32, 33, 35, 37, 39, 42, 43], "default_auth_param": [11, 42], "default_dataload": 17, "default_valu": [3, 32, 33, 43, 44], "defaultazurecredenti": [11, 42], "defaultlook": 43, "defin": [1, 2, 11, 17, 19, 27, 28, 32, 33, 36, 39, 43, 45], "definit": [30, 39, 42], "degrad": [1, 10, 19, 42], "delet": [13, 39], "deliv": 33, "depend": [3, 5, 6, 32, 35, 39, 42, 43], "deploi": [12, 23, 27, 28], "deploy": [2, 27, 36, 45], "deployment_config": 27, "deployment_nam": 27, "deploymentconfig": 27, "deprec": 43, "dequant": [42, 43], "dequantizelinear": [29, 43], "desc_act": 43, "describ": [19, 20, 22, 24, 32], "descript": [3, 8, 22, 27, 30, 42, 43, 44], "design": [4, 38], "desir": [2, 25, 33, 37, 39, 43], "detail": [7, 9, 11, 12, 13, 15, 18, 19, 22, 23, 24, 25, 26, 29, 30, 31, 33, 34, 35, 36, 38, 39, 42, 43, 44], "detect": 43, "determin": [10, 19, 28, 43], "develop": [10, 25, 26, 33, 37, 43], "devic": [2, 5, 6, 7, 8, 9, 10, 19, 25, 28, 35, 39, 42, 43, 45], "device_id": 22, "device_typ": 39, "diag": 43, "diagon": 43, "diagram": 2, "dict": [1, 3, 11, 27, 41, 42, 43], "dictionari": [2, 17, 32, 35, 39, 42, 43], "differ": [1, 2, 5, 6, 8, 10, 19, 22, 24, 25, 26, 27, 28, 35, 36, 39, 42, 43, 45], "difficult": 43, "diffus": [0, 43], "dim": [29, 39], "dim_index": 22, "dim_param": 43, "dim_valu": [30, 43], "dimens": [22, 30, 42, 43], "direct": 25, "directli": [1, 16, 17, 20, 39, 42, 43], "directml": [0, 3, 5, 6, 33, 35, 37, 43], "directori": [1, 2, 16, 17, 18, 19, 20, 26, 39, 42, 43], "directx": 5, "disabl": [10, 16, 43], "disable_al": 43, "disable_auto_optim": 10, "disable_force_evaluate_other_ep": 39, "disable_search": 32, "discov": [11, 18], "discuss": 2, "disk": [5, 6], "distil": [8, 33], "distilbert": 43, "distribut": [23, 42, 43], "distributedhfmodel": 43, "distributedhfmodelhandl": [41, 42, 43], "distributedonnxmodelhandl": [41, 42, 43], "div": 43, "dive": 17, "divid": [28, 43], "dlc": [26, 42, 43], "dml": [3, 33], "dmlexecutionprovid": [5, 6, 35, 39], "dn": 43, "do": [3, 18, 43], "do_copy_in_default_stream": 22, "do_valid": 43, "doc": [10, 43], "docker": [2, 12, 27, 42], "dockerfil": 35, "dockersystem": 2, "document": [5, 6, 11, 12, 18, 19, 20, 22, 26, 32, 42, 43], "doe": [10, 16, 17, 22, 35, 42, 43, 44], "doesn": [20, 27, 35], "domain": 43, "don": [3, 11, 22, 30, 42, 43], "done": [7, 22], "doubl": [11, 42], "double_qu": 43, "down": [13, 15, 43], "downcast": 43, "download": [5, 6, 7, 11, 12, 16, 18, 24, 35, 42], "dpu": [0, 43], "dq": 43, "driven": 33, "driver": 5, "drop": 34, "drop_typ": 23, "dropout": [22, 43], "dsp": 26, "dtype": [39, 42, 43], "due": [8, 28], "dummi": [17, 18, 39, 41], "dummy_data_config_templ": 17, "dummy_input": 39, "dummy_inputs_func": [16, 41], "dummydatacontain": 17, "duo_scal": 43, "duplic": 22, "dure": [8, 22, 23, 24, 37, 39, 42, 43], "dynam": [0, 8, 9, 33, 39, 42, 43], "dynamic_ax": [18, 42], "dynamic_lora_r": 43, "dynamic_shap": 18, "dynamic_to_fixed_shape_dim_param": 39, "dynamic_to_fixed_shape_dim_valu": 39, "dynamictofixedshap": [30, 42], "dynamo_export": [39, 43], "e": [5, 8, 10, 22, 27, 33, 37, 39, 43], "each": [2, 3, 11, 17, 22, 24, 27, 28, 30, 35, 39, 42, 43, 44, 45], "easi": [10, 28, 33, 34, 45], "easili": [8, 16, 17, 36, 42, 45], "edg": [8, 28, 36, 45], "effect": 43, "effici": [6, 22, 23, 24, 30, 33, 43], "egg": 37, "either": [1, 5, 6, 7, 33, 35, 39, 42, 43], "element": 43, "element_wise_binary_op": 43, "elimin": 22, "els": [42, 43], "embed": [28, 39, 43], "embedlayernorm": 30, "emploi": [2, 45], "empti": [5, 6, 42, 43], "en": [43, 44], "enabl": [5, 6, 8, 10, 16, 18, 30, 36, 37, 39, 42, 43, 45], "enable_al": 43, "enable_bas": 43, "enable_cpu_fallback": 43, "enable_cuda_graph": [10, 39, 43], "enable_dpu": 43, "enable_extend": 43, "enable_htp": [26, 43], "enable_profil": [22, 39, 42, 43], "enable_search": 39, "enable_trt_fp16": 10, "enablesubgraph": 43, "encapsul": [8, 43], "encod": [5, 6, 7, 22, 39, 41, 43], "end": [5, 6, 7, 22, 42, 43], "end_header_id": [5, 6, 7, 8], "endpoint": 27, "endpoint_nam": 27, "engin": [3, 10, 11, 12, 25, 26, 27, 31, 33, 35], "enhanc": 43, "enough": 15, "ensur": [33, 43], "entir": [5, 6, 30, 42, 43], "entiremodel": 39, "entrei": 39, "entri": [39, 43], "entropi": 43, "env": 27, "environ": [2, 5, 12, 15, 22, 25, 26, 27, 30, 33, 37, 39, 42], "environment_vari": [27, 35], "eot_id": [5, 6, 7, 8], "ep": [0, 2, 10, 33, 35, 39, 43], "epoch": 43, "equal": [28, 42], "equival": [29, 42], "erf": 29, "erf_output": 29, "error": [5, 6, 35, 39, 42, 43, 45], "especi": 43, "estim": [2, 42, 44], "etc": [3, 10, 22, 33, 35, 42, 43], "eval_accuraci": 19, "eval_data_config": 43, "eval_dataset": 43, "eval_split": 39, "eval_subset": 39, "evalu": [1, 10, 11, 16, 17, 18, 19, 27, 35, 39, 41, 43, 45], "evaluate_func": [1, 19, 42], "evaluate_func_kwarg": [19, 42], "evaluate_input_model": 42, "evaluation_strategi": 43, "even": [8, 28, 34, 39, 43], "everi": 43, "everywher": 33, "ex": 43, "exactli": 22, "exampl": [5, 6, 7, 10, 11, 16, 17, 18, 19, 28, 32, 35, 38, 39, 41, 43, 45], "example_input_func": 43, "except": [5, 6], "exclud": [11, 22, 42, 43], "exclude_emb": [39, 43], "exclude_lm_head": [39, 43], "exclude_managed_identity_credenti": 42, "excluded_precis": 43, "exclus": [13, 30, 39], "execut": [3, 5, 6, 7, 8, 9, 10, 16, 22, 25, 26, 27, 33, 35, 36, 39, 41, 42, 43, 45], "execution_mod": 27, "execution_mode_list": [39, 43], "execution_ord": [2, 10, 27, 42], "execution_provid": [3, 9, 10, 19, 27, 35, 42, 43], "exhast": 39, "exhaust": [2, 22, 39, 42, 44], "exist": [13, 16, 18, 22, 27, 28, 30, 35, 39, 43], "exit": [5, 6], "exllama": 43, "expand": 22, "expect": [10, 24, 43], "experi": [8, 33, 36], "experiment": 45, "expert": 10, "explan": 29, "explicitli": [20, 43], "explor": 40, "export": [15, 27, 28, 35, 43], "export_compat": 43, "export_in_mlflow_format": 27, "expos": [29, 30], "express": 22, "extend": [38, 43], "extens": [22, 31, 42, 43], "extern": [22, 43], "external_data_nam": 43, "external_initializers_file_nam": 41, "extra": [27, 35, 37, 39, 43], "extra_arg": [25, 42, 43], "extra_config": [27, 43], "extra_opt": 43, "extra_session_config": [27, 39, 43], "extract": [42, 43], "extractadapt": [22, 42], "extractedadapt": 39, "f": [5, 6, 7], "f1": [10, 18, 42], "f1_score": [1, 19, 42], "face": [6, 7, 8, 9, 17, 24, 31, 36, 42, 43, 45], "factor": [10, 16, 43], "fail": [11, 42], "failur": 43, "fake": 24, "falcon": 0, "fals": [5, 6, 7, 10, 16, 18, 22, 27, 32, 35, 39, 41, 42, 43, 44], "famili": 28, "fast": [5, 6, 43], "fast_bias_correct": 43, "faster": [16, 25, 33, 34], "fastgelu": 30, "fatal": 42, "favorit": [8, 34], "featur": [16, 33, 37, 38, 40, 43], "fetch": 18, "few": 28, "field": [3, 10, 16, 17, 19, 39, 43], "file": [1, 3, 5, 6, 7, 10, 11, 12, 13, 15, 16, 18, 22, 24, 25, 26, 33, 34, 35, 42, 43], "fill": [12, 43], "filter": 35, "final": [2, 3, 16, 28, 35, 42, 43], "final_orient": 43, "find": [2, 11, 15, 18, 22, 24, 30, 36, 38, 42], "fine": [0, 7, 8, 22, 24, 30, 39, 42, 43], "finetun": [5, 24, 36, 37, 45], "first": [2, 9, 17, 18, 27, 28, 42, 43], "first_conv_or_matmul_quant": 43, "firstli": [27, 43], "five": [2, 35], "fix": [2, 25, 26, 32, 33, 39, 42, 43], "flag": [12, 13, 35, 43], "flatten": 22, "flexibl": [10, 15, 22, 30, 33], "float": [8, 23, 29, 33, 39, 42, 43], "float16": [10, 39, 42, 43], "float32": [30, 39, 42, 43], "floattoimagebyt": 22, "flop": [28, 39, 43], "flow": 10, "flush": [5, 6, 7], "focus": [23, 33], "fold": 22, "folder": [3, 11, 27, 39, 42, 43], "follow": [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 15, 17, 18, 19, 22, 26, 27, 28, 34, 35, 37, 41, 42, 43, 44], "footprint": [7, 23, 24, 33], "forc": [39, 43], "force_evaluate_other_ep": 43, "force_fp16_input": 43, "force_fp32_nod": 43, "force_fp32_op": 43, "forcequantizenoinputcheck": 43, "form": [22, 25, 42], "format": [5, 6, 7, 8, 9, 22, 25, 27, 30, 33, 39, 42, 43], "found": [13, 16, 18, 22, 24, 25, 26, 30, 42, 43], "four": [17, 28], "fp16": [0, 5, 6, 8, 10, 28, 31, 34, 39, 43], "fp32": [0, 5, 6, 10, 39, 43], "fp4": [39, 43], "fp8": 39, "fraction": 43, "framework": [23, 24, 25, 26, 33, 42, 43], "free": 12, "freez": 24, "friendli": [33, 34], "from": [1, 2, 3, 5, 6, 7, 8, 9, 13, 16, 17, 18, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 39, 41, 42, 43, 44], "from_pretrain": 42, "frontier": [2, 42], "frozen": 24, "frustrat": 45, "ft": [7, 8], "full": [8, 22, 32], "fulli": [32, 36], "function": [1, 17, 19, 22, 29, 39, 41, 42, 43], "further": 43, "fuse": [22, 33, 42, 43], "fuse_layernorm": 43, "fusion": [30, 43], "fusion_opt": 43, "fusionopt": 43, "futur": 18, "g": [5, 8, 10, 22, 27, 33, 39, 43], "gate": [5, 6, 18, 43], "gather": 22, "gemm": [22, 43], "gemm_to_matmul": 43, "gemv": 43, "genai": 43, "gener": [5, 6, 7, 8, 18, 20, 22, 26, 27, 30, 33, 35, 41, 42, 43, 44], "generate_next_token": [5, 6, 7], "generation_config": [24, 43], "generatorparam": [5, 6, 7], "get": [6, 18, 28, 35, 38, 39, 41, 43, 45], "get_model_compon": 41, "get_next_token": [5, 6, 7], "get_qnn_qdq_config": 43, "gigabyt": 28, "git": 37, "github": [17, 22, 37, 39, 42], "give": [30, 42], "given": [10, 17, 39, 42, 43], "glue": [10, 17], "go": [5, 6], "goal": [1, 2, 10, 19, 33, 42], "gpt": 24, "gpt2": 43, "gpt_neox": 43, "gptj": 0, "gptq": [8, 33, 34, 39, 42, 43], "gptqquantiz": [34, 42], "gpu": [0, 3, 5, 6, 7, 8, 10, 24, 30, 31, 33, 34, 35, 37, 39, 43, 45], "gpu_mem_limit": 22, "gradient": 24, "grain": 30, "grant": 18, "graph": [5, 6, 22, 25, 28, 29, 30, 33, 36, 37, 41, 42, 43, 45], "graph_optimization_level": [27, 43], "graphsurgeri": [29, 42], "greatli": 7, "grid": 44, "group": [7, 11, 13, 35, 39, 42, 43, 44], "group_siz": 43, "groupqueryattent": 43, "guanaco": 18, "guid": [5, 18, 28], "h": [39, 43], "ha": [2, 3, 5, 7, 8, 9, 10, 16, 20, 22, 24, 28, 30, 37, 39, 41, 42, 43, 45], "hadamard": [34, 43], "half": 43, "handl": [5, 6, 43], "handler": 43, "hang": [11, 42], "happen": 23, "hardwar": [0, 2, 5, 6, 8, 22, 23, 30, 33, 34, 35, 42, 43, 45], "hash": 39, "have": [1, 2, 5, 7, 8, 11, 18, 19, 22, 24, 27, 28, 30, 35, 41, 42, 43], "head": [28, 39, 42, 43], "header": [39, 43], "height": 22, "help": [5, 6, 10, 13, 23], "helper": 30, "here": [1, 10, 12, 16, 17, 18, 22, 30, 32, 35, 42, 43], "hessian": 43, "hexagon": 26, "hf": [18, 42, 43], "hf_model_nam": 39, "hf_token": [18, 42], "hfloadkwarg": 41, "hfmodel": [9, 10, 18, 24, 31, 39, 42, 43], "hfmodelhandl": [41, 42, 43], "hftrainingargu": 24, "hh": 15, "hidden": [42, 43], "hidden_s": [42, 43], "high": [8, 22], "higher": [28, 33, 39, 42], "higher_is_bett": [19, 27, 42], "histori": [27, 43], "hold": 3, "home": [12, 35], "host": [2, 3, 9, 10, 18, 27, 35, 36], "how": [4, 7, 11, 12, 16, 18, 22, 28, 38, 39, 42, 43], "howev": 28, "hqq": [39, 43], "html": [22, 39, 43, 44], "htp": 43, "htp_soc": 43, "http": [11, 16, 17, 22, 37, 39, 42, 43, 44], "hub": [20, 28, 42], "hug": [6, 7, 8, 9, 17, 24, 31, 36, 42, 43, 45], "huggingfac": [5, 6, 8, 17, 24, 28, 34, 39, 42, 43], "huggingface_data_config": 17, "huggingface_dataset": 17, "huggingface_metr": [10, 18, 42], "huggingface_pre_process": 17, "huggingfacecontain": [10, 17, 18], "huggingfacetb": [5, 6], "hw": 22, "hyperparamet": [33, 34], "i": [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, 42, 43, 44], "id": [5, 6, 8, 11, 13, 20, 27, 35, 39, 43], "ideal": [2, 33], "ident": [11, 18, 22, 35, 42], "identifi": 22, "idl": [13, 39], "idle_time_before_scale_down": [13, 39], "ignor": [3, 13, 17, 27, 39, 43], "ignored_scop": 43, "ignored_scope_typ": 43, "ignorescopetypeenum": 43, "illustr": 2, "imag": [22, 27, 35, 43], "image_format": 22, "image_nam": 35, "imagebytestofloat": 22, "immedi": 16, "impact": 8, "implement": [2, 22, 24, 26, 28, 33, 34, 39, 42], "import": [1, 3, 5, 6, 7, 17, 18, 43], "improv": [10, 19, 22, 23, 24, 28, 30, 33, 34, 42, 43, 45], "inc": [8, 25, 43], "inc_dynam": 39, "inc_quant": 33, "incdynamicquant": [9, 33, 42], "incept": 0, "inceptionv3": 26, "includ": [3, 5, 8, 11, 15, 16, 17, 22, 27, 28, 30, 33, 35, 39, 43, 45], "include_runtime_packag": 27, "incompat": 22, "incquant": [33, 42], "increas": [11, 24, 33, 42], "incstaticquant": [33, 42], "independ": 2, "index": [29, 42], "indic": [13, 17, 29, 35], "individu": [39, 45], "infer": [8, 9, 17, 19, 20, 22, 23, 24, 25, 26, 28, 29, 30, 31, 33, 34, 35, 36, 39, 42, 43], "inferenc": 27, "inference_set": [27, 41, 42], "inferenceserverconfig": 27, "inferencing_serv": 27, "influenc": 16, "info": [27, 29, 39, 42], "inform": [3, 10, 18, 23, 24, 29, 43, 44], "informs": 39, "infrastructur": 12, "infus": [5, 6], "init_overrid": 43, "initi": [2, 11, 22, 24, 29, 42, 43], "inject": 24, "inlin": 22, "input": [2, 3, 5, 6, 8, 9, 10, 11, 16, 17, 22, 23, 24, 26, 27, 29, 33, 35, 41, 43, 45], "input0": 43, "input0_chanfirst": 43, "input1": [29, 43], "input2": [29, 43], "input_col": [10, 17, 39], "input_dim": [30, 43], "input_dir": 17, "input_id": [5, 6, 7, 17, 18, 42], "input_idx": 29, "input_index": 22, "input_int32": 43, "input_layout": 43, "input_list": [25, 42, 43], "input_model": [9, 10, 18, 20, 42], "input_model_dtyp": 43, "input_nam": [17, 18, 26, 30, 41, 42, 43], "input_nod": 43, "input_ord": 17, "input_order_fil": 17, "input_shap": [17, 18, 26, 30, 41, 42, 43], "input_suffix": 17, "input_token": [5, 6], "input_typ": [17, 18, 42, 43], "inputlayout": 43, "inputs_to_make_channel_last": 43, "inputtyp": 43, "insensit": [32, 42], "insert": [23, 33, 42, 43], "insertbeamsearch": [22, 42], "inside_layer_modul": 43, "instal": [2, 3, 12, 18, 27, 35, 36, 38, 43], "instanc": [13, 22, 27, 35, 39, 43], "instance_count": [27, 35], "instance_typ": 27, "instead": [16, 25, 26, 30, 35, 39, 43], "instruct": [5, 6, 7, 8, 9, 12, 15, 18, 26, 28, 33, 36, 39], "int": [3, 10, 11, 27, 41, 42, 43, 44], "int16": 39, "int32": [39, 43], "int4": [0, 5, 6, 10, 30, 39, 43], "int4_accuracy_level": [39, 43], "int4_block_s": [39, 43], "int4_quantization_mod": 39, "int64": [17, 18], "int8": [0, 5, 6, 10, 33, 39, 43], "integ": [22, 23, 30, 43], "integr": [0, 7, 14, 22, 30, 33, 34, 39, 45], "intel": [0, 8, 9, 10, 17, 23, 42, 43, 45], "intellisens": 42, "intend": 28, "intens": 28, "inter": [39, 43], "inter_op_num_thread": 27, "inter_thread_num_list": [39, 43], "interf": 27, "interfac": [3, 5, 6, 13, 22, 30, 33], "intermedi": [2, 16, 29, 43], "intern": 43, "internet": 16, "interpret": 1, "interrupt": 15, "interv": [11, 42, 43], "intra": [39, 43], "intra_op_num_thread": 27, "intra_thread_num_list": [39, 43], "introduc": [3, 22, 28], "invalid": [3, 22, 43], "invoc": 27, "invok": [3, 39, 43], "involv": [22, 29], "io": [12, 18, 39, 41, 42, 43, 44], "io_bind": [9, 10, 27, 39, 43], "io_config": [16, 18, 39, 41, 42], "io_map": 22, "iobind": [39, 43], "ioconfig": 41, "iomapentri": 22, "ir": [10, 23, 42], "is_don": [5, 6, 7], "is_generative_model": 39, "is_symmetr": 43, "isolatedort": 35, "isolatedortsystem": 2, "issu": [11, 42], "item": [10, 17, 42], "iter": [2, 41, 42, 43], "its": [2, 18, 19, 22, 23, 24, 25, 26, 29, 30, 33, 34, 35, 42, 43, 44], "itself": 22, "job": [7, 12, 18, 36, 42, 43], "job_id": [7, 11, 20], "joint": [2, 10, 27, 42], "jsexecutionprovid": 39, "json": [1, 9, 10, 13, 15, 17, 35, 39, 43, 45], "json_config": 17, "jupyt": 5, "just": [17, 43], "k": 27, "k8": 12, "keep": [5, 6, 10, 43], "keep_io_typ": [30, 43], "kei": [1, 17, 18, 22, 39, 42, 43], "keras2onnx": 43, "kernel": 43, "key1": 39, "key2": 39, "keyboardinterrupt": [5, 6], "keyvault": [18, 39, 42], "keyvault_nam": [18, 39, 42], "keyword": [17, 42], "kind": [12, 22], "knextpoweroftwo": 22, "knob": 22, "know": 28, "knowledg": [10, 22], "known": [26, 27], "kube": 12, "kubeconfig": 12, "kv_cach": [17, 42], "kwarg": [17, 42], "l35": 17, "label": [10, 17, 35], "label_col": [10, 17], "languag": [5, 6, 8, 28, 34, 39, 42, 43], "larg": [11, 24, 28, 42, 43], "last": [12, 43], "last_conv_or_matmul_quant": 43, "latenc": [2, 10, 23, 27, 36, 41, 42, 43, 45], "latency_data_config": 19, "latent": 43, "later": 25, "latest": [8, 22, 37, 43], "layer": [22, 24, 28, 31, 39, 42, 43], "layer_name_filt": 43, "layernorm": [30, 43], "layers_block_nam": 43, "layout": [22, 43], "lead": 8, "learn": [4, 5, 13, 16, 18, 20, 22, 23, 24, 26, 27, 30, 31, 33, 34, 35, 36, 38, 40, 43], "learning_r": [24, 43], "least": [17, 24], "leav": [30, 43], "left": 43, "length": [5, 6, 30, 39, 42, 43], "less": [28, 33], "let": [1, 17, 28, 45], "letter": 17, "level": [15, 20, 28, 30, 39, 42, 43], "leverag": [17, 22], "lib": [42, 43], "lib_nam": 43, "lib_target": [25, 43], "librari": [5, 8, 22, 24, 30, 31, 33, 34, 42, 43], "light": [2, 5, 6], "lightn": [0, 24, 43], "lightning_modul": 43, "lightningdatamodul": 43, "lightningmodul": 43, "like": [4, 7, 11, 13, 16, 17, 22, 24, 27, 28, 31, 33, 39, 42, 43], "limit": [28, 39], "line": [8, 13, 18, 22, 28, 42, 43], "linear": [31, 42, 43], "link": [0, 24, 43], "linux": [5, 25, 43], "list": [2, 17, 18, 22, 27, 29, 32, 35, 37, 39, 41, 42, 43, 44], "live": 45, "ll": [5, 6, 7, 8, 9], "llama": [0, 5, 6, 7, 8, 9, 18, 42, 43], "llama2": 0, "llava": 43, "llm": [28, 34], "load": [1, 4, 5, 6, 7, 17, 19, 22, 24, 25, 26, 28, 39, 41, 42, 43], "load_dataset": 17, "load_dataset_config": [1, 10, 17, 18], "load_kwarg": [41, 42], "load_param": 42, "loader": 39, "local": [0, 2, 5, 6, 7, 15, 16, 33, 36, 39, 42], "local_cach": 16, "local_dataset": 17, "local_docker_config": 35, "local_system": [9, 10, 27, 35, 42], "localsystem": [2, 9, 10, 35, 42], "locat": [13, 16, 39, 42], "loftq": 42, "loftq_it": 43, "log": [15, 16, 18, 39, 42], "log_level": [5, 6, 7, 8, 39], "log_severity_level": [9, 42], "log_to_fil": 42, "logger": 43, "logic": [1, 43], "login": [5, 6, 16, 39], "logit": [18, 19, 30, 43], "logits_processor": 43, "logsoftmax": 22, "long": [11, 15, 42, 43], "look": [10, 16, 17], "loop": [0, 24, 43], "lora": [7, 8, 22, 36, 42, 45], "lora_alpha": [24, 39, 43], "lora_dropout": 43, "lora_r": [39, 43], "loss": [8, 43], "low": [24, 43], "lower": [6, 8, 24, 27], "lr_scheduler_typ": 43, "lunch": 12, "m": [2, 15, 25, 26, 28, 39, 43], "mac": 5, "machin": [2, 5, 7, 8, 13, 14, 18, 20, 22, 27, 30, 33, 35, 36, 43], "made": 28, "mai": [8, 12, 16, 22, 30, 37, 43], "main": [2, 17, 37, 41], "main_export": 43, "maintain": 2, "make": [10, 12, 16, 18, 24, 28, 30, 33, 34, 43], "make_input": [22, 43], "manag": [2, 11, 13, 14, 18, 39, 42], "managedonlinedeploy": 27, "mandatori": 35, "mani": [5, 6, 7, 8, 16, 28, 30, 43], "manipul": [29, 36], "manual": [10, 18, 45], "map": 43, "mask": 43, "master": 24, "match": [16, 43], "matmul": [8, 22, 39, 42, 43], "matmul4": 39, "matmulconstbonli": 43, "matmulnbit": [39, 42, 43], "matmulnbitstoqdq": 42, "matric": [22, 24], "matrix": 43, "max": [1, 5, 6, 10, 19, 28, 42, 43], "max_drop": 23, "max_finite_v": 43, "max_it": 42, "max_lay": 43, "max_length": [5, 6, 7], "max_nod": [13, 39], "max_operation_retri": [11, 42], "max_sampl": [10, 17, 39], "max_seq_len": [18, 39], "max_step": [7, 8], "max_tim": 42, "max_trial": 43, "maximum": [11, 13, 39, 42, 43], "maxpool": 43, "mcr": [11, 35, 42], "mean": [6, 18, 33, 42, 43], "measur": [42, 43], "mechan": 45, "meet": [2, 17, 33], "member": [3, 43], "memori": [8, 23, 24, 28, 33, 34, 39], "merg": [0, 24, 42, 43], "merge_adapter_weight": 43, "mergeadapterweight": 42, "met": 42, "meta": [5, 6, 7, 8, 9, 18, 42], "metadata": 43, "metadata_onli": 43, "method": [3, 6, 7, 8, 23, 24, 33, 38, 39, 42, 43], "metric": [1, 2, 10, 36, 42, 43, 45], "metric_1": 27, "metric_2": 27, "metric_3": 27, "metric_config": 42, "metric_func": [1, 19, 42], "metric_func_kwarg": 42, "microsoft": [11, 12, 17, 28, 33, 35, 37, 39, 42, 43], "might": [2, 28, 33, 39, 43], "migraphxexecutionprovid": [5, 6, 39], "min": [10, 19, 42, 43], "min_lay": 43, "min_nod": [13, 39], "min_positive_v": 43, "mini": [27, 28, 33, 39], "mini_batch_s": 27, "minim": 30, "minimum": [13, 39, 42, 43], "minmax": 43, "minms": 43, "minut": [12, 38], "miss": 29, "mistral": 0, "mit": 34, "mix": [39, 42, 43], "mixed_precision_overrides_config": 39, "mixed_precision_overrides_util": 43, "mixedprecisionoverrid": [39, 42], "mixin": 41, "mixtral": 43, "ml": [7, 15, 20, 25, 27, 35], "mlclient": 20, "mlflow": 27, "mm": 15, "mnb_to_qdq": 39, "mnist_requir": 35, "mobil": 30, "mobilenet": 0, "mode": [22, 37, 39, 42, 43], "model": [0, 2, 3, 4, 8, 9, 10, 14, 16, 19, 22, 24, 29, 31, 34, 35, 36, 40, 43, 45], "model_attribut": [27, 41], "model_attributes_kei": 27, "model_attributes_valu": 27, "model_build": 43, "model_compon": 41, "model_component_nam": 41, "model_config": 27, "model_dir": [11, 20], "model_file_format": [39, 41], "model_fold": [5, 6], "model_hash": 39, "model_load": [16, 41], "model_nam": [11, 17, 20, 27, 39], "model_name_or_path": [5, 6, 7, 8, 39], "model_name_pattern": 41, "model_output": 19, "model_packag": 27, "model_path": [9, 10, 11, 18, 20, 27, 41, 42], "model_path_nam": 43, "model_rank": 27, "model_script": [39, 41], "model_typ": [18, 22, 28, 42, 43], "model_vers": [11, 27], "modelbuild": [30, 42], "modelconfig": 41, "modeldtyp": 43, "modelfileformat": 41, "modelpackageconfig": 27, "models_rank": 27, "modeltypeenum": 43, "modif": 29, "modifi": [22, 28], "modul": [0, 1, 17, 28, 30, 31, 39, 42, 43], "modular": 2, "modules_to_fus": 43, "modules_to_not_convert": 43, "modules_to_sav": 43, "monoton": 22, "more": [2, 7, 8, 9, 10, 11, 13, 15, 16, 18, 19, 22, 23, 24, 25, 26, 30, 31, 33, 34, 35, 36, 38, 39, 42, 43, 44], "most": [10, 22, 30, 34, 35, 42, 43], "move": 23, "mrpc": [10, 17], "mse": 43, "msi": 18, "much": 43, "mul": [29, 43], "mul_0": 29, "multi": [2, 7, 45], "multiheadattent": 43, "multipl": [0, 16, 22, 27, 28, 29, 30, 36, 39, 41, 42, 43], "multipli": 22, "multivari": 44, "must": [2, 3, 17, 18, 22, 25, 32, 33, 35, 39, 42, 43], "mutual": [13, 39], "mxnet": 33, "my_dataload": 1, "my_datastor": [11, 20], "my_job_id": [11, 20], "my_keyvault_nam": 18, "my_model": [11, 20], "my_model_dir": 20, "my_modul": 1, "my_olive_project": 1, "my_output_nam": [11, 20], "my_post_process": 1, "my_resource_group": [11, 20], "my_script": 1, "my_subscription_id": [11, 20], "my_val": 1, "my_workspac": [11, 20], "myaccountnam": 16, "mycontainernam": 16, "mydataload": 1, "myenv": 35, "n": [7, 8, 39, 43], "name": [1, 3, 5, 6, 7, 9, 10, 11, 13, 15, 16, 17, 18, 19, 22, 23, 27, 29, 35, 42, 43, 44], "name_pattern": 43, "nc_workspac": 43, "ndarrai": 43, "necessari": [15, 18, 33, 35, 43], "necessarili": 27, "need": [2, 5, 6, 7, 8, 10, 11, 15, 16, 17, 18, 19, 22, 26, 28, 33, 36, 37, 39, 42, 43], "need_layer_fus": 43, "neglig": 24, "neighbor": 43, "ner_huggingface_preprocess": 17, "ner_post_process": 17, "nest": 43, "net": 16, "network": [5, 6, 11, 23, 24, 26, 33, 42, 43], "neural": [0, 8, 9, 23, 26, 42, 43], "new": [4, 5, 6, 7, 12, 13, 18, 24, 29, 35, 36, 39], "new_nam": [29, 43], "new_token": [5, 6, 7], "newoptimizationtrick": 3, "next": [3, 5, 6, 16, 27], "nf4": [24, 39, 43], "ngram": 43, "nlp": [0, 43], "nn": [31, 42, 43], "nnapi": [30, 43], "nnx": 45, "no_auto_batch_dataload": 17, "no_repeat_ngram_s": [22, 43], "node": [13, 22, 29, 30, 39, 42, 43], "node_block_list": 43, "node_nam": 29, "nodes_to_exclud": 43, "nodes_to_quant": 43, "non": [22, 43], "none": [10, 17, 19, 27, 35, 39, 41, 42, 43], "nonoverflow": [33, 43], "nop": 22, "normal": [3, 15], "note": [3, 11, 12, 18, 20, 23, 24, 25, 26, 27, 28, 30, 33, 35, 37, 42, 43], "now": [10, 17, 23, 28, 42, 43], "np": [5, 6, 7], "npu": [0, 5, 6, 25, 35, 39, 45], "nsplit": 28, "null": [9, 17, 27, 42], "num": [28, 39], "num_attention_head": 42, "num_byt": [22, 39, 43], "num_cpu": 35, "num_epoch": [24, 43], "num_flop": [39, 43], "num_gpu": 35, "num_head": [42, 43], "num_hidden_lay": 42, "num_key_value_head": 43, "num_param": [39, 43], "num_rank": 41, "num_sampl": [10, 27, 42, 44], "num_split": [39, 43], "num_step": 43, "number": [11, 13, 16, 17, 22, 27, 28, 39, 42, 43, 44], "numpi": [5, 6, 7, 39, 43], "nvidia": [5, 6, 8, 43, 45], "nvmo": [8, 39], "nvmodeloptquant": 33, "o": [2, 5, 6, 28, 35, 39, 45], "oasst1_train": 18, "object": [1, 2, 3, 22, 33, 43], "object_detect": 43, "objectclass": 3, "occur": [22, 43], "odd": 39, "off": [15, 24, 34, 43], "offlin": [22, 34, 43], "often": 30, "og": [5, 6, 7], "old_nam": [29, 43], "oliv": [1, 2, 3, 6, 7, 8, 10, 11, 13, 14, 16, 17, 18, 19, 20, 22, 24, 28, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44], "olive_ignored_param_valu": 43, "olive_invalid_param_valu": 43, "olive_managed_env": 35, "olive_output_model": 27, "olive_output_model_cpu": 27, "olivemodelhandl": [3, 41, 43], "oliveol": 43, "olivepass": 33, "omit": 20, "onc": [5, 7, 8, 16, 23, 33, 43], "one": [0, 2, 3, 5, 6, 8, 13, 19, 22, 24, 26, 27, 28, 35, 39, 43], "ones": 33, "onli": [0, 1, 2, 3, 5, 10, 13, 16, 17, 19, 24, 26, 27, 31, 33, 34, 35, 39, 41, 42, 43], "only_onnxruntim": 43, "onnx": [0, 2, 9, 10, 21, 25, 26, 27, 28, 34, 35, 36, 37, 42, 45], "onnx_adapt": [7, 39, 43], "onnx_convers": 42, "onnx_dynam": 39, "onnx_file_nam": 41, "onnx_model_path": 39, "onnx_quant": [33, 42], "onnxconfig": 18, "onnxconvers": [9, 10, 18, 30, 42], "onnxdynamicquant": [33, 42], "onnxepvalidatemixin": 41, "onnxfloattofloat16": [30, 42], "onnxgraphmixin": 41, "onnxiodatatypeconvert": [30, 42], "onnxmatmul4quant": 42, "onnxmodel": 27, "onnxmodelhandl": [3, 23, 41, 42, 43], "onnxoptim": 22, "onnxopversionconvers": [30, 42], "onnxpeepholeoptim": [22, 42], "onnxquant": [32, 33, 42], "onnxrt_cuda_ep": 43, "onnxrt_trt_ep": 43, "onnxruntim": [3, 22, 27, 30, 35, 37, 42, 43], "onnxruntime_genai": [5, 6, 7], "onnxruntimepackag": 27, "onnxscript": [22, 37], "onnxstaticquant": [33, 42], "onnxtransformersoptim": 42, "op": [30, 42, 43], "op_block_list": [30, 43], "op_typ": 29, "op_type_dict": 43, "op_types_to_quant": 43, "open": [0, 22, 23, 30, 33], "openassist": 18, "openmpi4": [11, 35, 42], "openvino": [0, 5, 6, 10, 21, 35, 42], "openvino_docs_ov_converter_ug_conversion_opt": 43, "openvinoconvers": [23, 42], "openvinoexecutionprovid": [5, 6, 35, 39], "openvinomodelhandl": [41, 42, 43], "openvinoquant": [1, 23, 42], "openvinoquantizationwithaccuraci": 23, "oper": [2, 8, 11, 22, 23, 25, 27, 29, 30, 33, 39, 41, 42, 43], "operation_retry_interv": [11, 42], "opportun": 22, "opset": [30, 39, 43], "opt": [5, 6, 7, 8, 24, 36, 37, 39, 43, 45], "opt_level": [10, 43], "opt_level_list": [39, 43], "optim": [0, 2, 4, 9, 17, 18, 23, 24, 28, 30, 31, 34, 35, 36, 40, 42, 43, 45], "optimization_opt": 43, "optimize_model": 43, "optimum": [0, 18, 42, 43], "optimumconvers": 42, "optimummerg": 42, "option": [2, 3, 5, 9, 10, 17, 22, 25, 28, 30, 32, 35, 40, 43, 44], "optional_input": 43, "optuna": 44, "optyp": 43, "optypes_to_exclude_output_qu": 43, "orchestr": 45, "order": [22, 27, 29, 42, 43, 45], "org": 43, "organ": [12, 17, 42], "orient": 43, "origin": [10, 23, 24, 29, 34, 43], "ort": [5, 6, 9, 33, 39, 42, 43], "ort_log_severity_level": 42, "ort_past_key_nam": 42, "ort_past_value_nam": 42, "ort_present_key_nam": 42, "ort_present_value_nam": 42, "ort_py_log_severity_level": 42, "orthogon": 24, "ortmixedprecis": [30, 42], "ortperftun": 10, "ortsessionparamstun": [9, 18, 22, 42], "orttransformersoptim": [10, 22, 42], "other": [2, 3, 5, 12, 16, 17, 18, 22, 30, 32, 33, 43], "otherwis": [5, 6, 35, 39, 42, 43], "our": 42, "out": [12, 22, 24, 26, 32, 33, 34, 35, 43], "out_1": 43, "out_2": 43, "out_nod": 43, "outlin": [11, 18], "output": [1, 2, 3, 5, 6, 7, 8, 9, 10, 16, 17, 22, 24, 27, 29, 39, 42, 43, 45], "output0": 43, "output0_chanfirst": 43, "output1": [29, 43], "output2": [29, 43], "output_dir": [9, 10, 27, 42, 43], "output_format": 22, "output_index": 22, "output_model": [39, 43], "output_model_num": 42, "output_model_path": 3, "output_nam": [11, 18, 20, 26, 29, 41, 42, 43], "output_nod": 43, "output_path": [5, 6, 7, 8, 39], "output_shap": [26, 41], "outputmodel": [9, 27], "outputs_to_make_channel_last": 43, "outside_layer_modul": 43, "ov_model": 43, "over": [2, 42, 43, 44], "overrid": [18, 27, 39, 43], "overridden": 32, "overrides_config": 43, "overview": [12, 29, 38], "overwrit": 43, "overwrite_cache_record": 43, "overwrite_output_dir": 43, "overwritten": 43, "own": [1, 2, 4, 5, 14, 15, 17, 18, 19, 20, 22, 36, 38, 39, 42], "p": [28, 39], "pack": [22, 43], "pack_input": 22, "packag": [2, 3, 5, 6, 34, 35, 36, 37, 38, 39, 42, 45], "package_config": 39, "packaging_config": [9, 27, 42], "packaging_config_nam": 27, "packagingconfig": [27, 42], "packagingtyp": 27, "pad": 22, "pad_to_max_len": 18, "page": [12, 16, 18], "paged_adamw_32bit": 43, "pair": 43, "pajama": 0, "paper": [24, 34], "parallel": [39, 43], "parallel_job": 43, "param": [1, 3, 5, 6, 7, 17, 22, 42], "param1": 3, "param2": 3, "param3": 3, "param4": 3, "param5": 3, "param6": 3, "paramcategori": 3, "paramet": [2, 3, 8, 11, 17, 22, 23, 24, 25, 26, 28, 30, 32, 33, 34, 39, 40, 42, 43, 44, 45], "parent": [3, 42, 43], "pareto": [2, 42], "pars": 42, "part": 22, "particular": 43, "particularli": 30, "parzen": [2, 42, 44], "pass": [1, 4, 9, 10, 11, 12, 16, 17, 18, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 39, 40, 45], "pass_flow": [10, 42], "passconfigparam": 3, "past": [5, 6, 7, 18, 39, 41, 42, 43], "past_key_valu": [39, 42, 43], "past_key_value_nam": [39, 43], "past_kv_dynamic_axi": 42, "past_present_share_buff": [5, 6, 7], "past_seq_len": 17, "past_sequence_length": 42, "path": [1, 3, 11, 12, 13, 15, 17, 18, 21, 25, 26, 27, 35, 39, 41, 42, 43], "pathlib": 43, "pattern": [7, 22, 24, 30, 31, 42, 43], "pc": 33, "pca": 43, "pdf": 43, "peft": [7, 8, 22, 24, 39], "per": [2, 28, 43], "per_channel": [32, 33, 43], "percdamp": 43, "percent": [10, 19, 42, 43], "percentag": [42, 43], "percentil": 43, "perchannel": 43, "perform": [2, 8, 24, 27, 29, 30, 33, 34, 42, 43, 45], "permut": 29, "perplex": [24, 43], "phi": [28, 33, 39], "phi2": 0, "phi3": 33, "phrase": [5, 6, 7, 8], "phrase_classif": [7, 8], "phrase_classifi": 7, "pile": 43, "pip": [15, 16, 23, 35, 38], "pipelin": [2, 15, 42], "pixelstoycbcr": 22, "place": [1, 39, 43], "placehold": [7, 22], "platform": [2, 5, 6, 8, 22, 23, 30, 33, 35], "platform_sdk": 43, "pleas": [3, 9, 11, 12, 15, 18, 19, 22, 23, 24, 25, 26, 27, 28, 30, 31, 33, 34, 35, 37, 42, 43], "plot": 42, "plot_pareto_fronti": 42, "plu": [5, 6, 27], "png": 22, "point": [2, 3, 8, 22, 23, 25, 26, 33, 39, 42, 43, 44], "pool": 22, "poor": 30, "popul": 29, "popular": [17, 28, 33], "posit": [17, 42], "position_id": 18, "possibl": [2, 22, 28, 30, 32, 34, 35, 39], "post": [0, 1, 8, 17, 24, 28, 36, 42, 43], "post_process": [1, 17, 42], "post_process_data": 17, "post_process_data_config": [1, 17, 42], "postprocess": 43, "potenti": [16, 43], "power": [10, 15, 28, 33], "pre": [0, 24, 28, 32, 33, 35, 36, 42, 43], "pre_post_process_quant": 43, "pre_process": 17, "pre_process_data": 17, "pre_process_data_config": [10, 17, 18], "precis": [5, 6, 8, 10, 23, 28, 31, 33, 36, 39, 42, 43, 45], "pred": [19, 42], "predefin": [42, 43], "predict": [17, 26, 42], "predict_with_kv_cach": 39, "prefer": [30, 43], "prefix": [27, 42], "prefix_vocab_mask": 43, "prepar": [18, 43], "prepare_qnn_config": 43, "prepend": 35, "prepend_to_path": 35, "prepostprocessor": [22, 43], "prepostprocessorinput": [22, 43], "preprocess": [17, 33, 39, 42, 43], "prerequisit": [39, 43], "present": [42, 43], "present_kv_dynamic_axi": 42, "preserv": 43, "preset": 43, "presetenum": 43, "press": [5, 6], "pretrain": 24, "previou": 39, "primari": 33, "primit": 36, "print": [1, 5, 6, 7], "prioriti": [1, 10, 18, 19, 27, 39, 42], "priority_hint": 43, "privat": 39, "probabl": 43, "problem": 43, "proce": 16, "process": [0, 1, 5, 6, 15, 16, 17, 18, 23, 24, 26, 28, 33, 36, 39, 42, 43], "produc": [7, 28, 29, 39, 45], "product": 45, "profil": [39, 43], "program": [5, 6], "prompt": [5, 6, 17, 39], "propag": [22, 35], "proper": [22, 24], "proprietari": 39, "proto": [22, 43], "protobuf": 30, "provid": [1, 2, 3, 5, 6, 7, 8, 10, 11, 13, 15, 16, 17, 18, 19, 22, 24, 25, 26, 27, 28, 29, 32, 33, 35, 36, 41, 42, 43, 45], "provider_opt": 27, "provider_options_list": 43, "providers_list": [22, 39, 43], "prune": [8, 24, 33, 43], "pt": [11, 20, 39], "ptl_data_modul": [24, 43], "ptl_modul": [24, 43], "ptldatamodul": 24, "ptlmodul": 24, "public": 37, "publicresourc": 43, "pull": [5, 6], "purpos": 18, "py": [1, 5, 6, 7, 17, 19, 22, 24, 26, 27, 33, 39, 42, 43], "py_vers": [25, 26, 39], "pydant": 43, "pypi": 37, "python": [1, 2, 5, 6, 7, 13, 15, 17, 25, 26, 27, 37, 39, 42, 43], "python_environment_path": 35, "python_system": 35, "pythonenviron": [35, 42], "pythonenvironmentsystem": 2, "pytorch": [0, 6, 7, 8, 21, 25, 28, 30, 33, 36, 42, 45], "pytorch_entire_model": 41, "pytorch_lightn": 43, "pytorchmodel": 20, "pytorchmodelhandl": [23, 39, 41, 42, 43], "q": 36, "q_group_siz": 43, "qat": [0, 24, 43], "qconfig": [24, 43], "qconfig_func": 43, "qdq": [0, 33, 39, 42, 43], "qint8": [33, 43], "qkv": 22, "qlora": [0, 22, 39, 42], "qnn": [0, 21, 30, 33, 39, 42], "qnn_backend": 25, "qnn_extra_opt": 43, "qnn_model": 43, "qnn_sdk_root": 25, "qnncontextbinarygener": [25, 42], "qnnconvers": [25, 42], "qnnexecutionprovid": [5, 6, 39], "qnnmodelhandl": 43, "qnnmodellibgener": [25, 42], "qnnpreprocess": [33, 42], "qoper": 43, "qualcomm": [0, 5, 6, 25, 26, 43, 45], "qualifi": 42, "qualiti": [6, 34, 45], "quant": 43, "quant_data_config": 26, "quant_format": [33, 43], "quant_level": 43, "quant_mod": 43, "quant_preprocess": 43, "quant_typ": [24, 43], "quantiz": [0, 2, 5, 6, 9, 28, 29, 36, 38, 42, 43, 45], "quantizationawaretrain": [24, 42], "quantize_int4": 39, "quantized_tensor": 29, "quantizelinear": 43, "quanttyp": 43, "quarot": 42, "queri": 22, "question": [28, 39], "quick": 10, "quickstart": 5, "quint8": [32, 33, 42, 43], "quot": 43, "r": [39, 43], "rais": [28, 35], "random": [2, 39, 42, 43, 44], "randomli": 2, "rang": [5, 22, 43], "rank": [19, 24], "rapid": 33, "rate": 43, "rather": [8, 43], "ratio": 43, "raw": [17, 43], "raw_data": [17, 29], "raw_dataset": 17, "rawdatacontain": 17, "reach": 33, "read": [5, 7, 11, 18, 23, 35, 42], "read_timeout": [11, 42], "readi": 16, "readthedoc": [43, 44], "real": 43, "rearrang": 43, "recal": 42, "receiv": [3, 42], "recip": 43, "recommend": [5, 26, 36, 37, 43], "recommendation_system": 43, "recov": 8, "recv_nod": 43, "red": 0, "reduc": [7, 8, 22, 23, 24, 33, 34, 43, 45], "reduce_rang": [32, 43], "reducemean": 43, "redund": [10, 22, 32], "ref": 32, "refer": [8, 9, 11, 17, 18, 19, 22, 23, 24, 25, 26, 28, 30, 31, 33, 34, 35, 37, 38, 39, 42, 43, 44], "regardless": 43, "regist": [2, 20, 27, 42], "register_dataload": [1, 17], "register_dataset": 17, "register_post_process": [1, 17], "register_pre_process": 17, "registr": 27, "registri": [0, 1, 5, 6, 7, 17, 39], "registry_nam": [11, 18, 20, 39], "regular": 31, "rel": [11, 20], "relat": [22, 40, 42, 43], "relationship": 2, "relative_path": [11, 20, 42], "reli": 30, "relu": [29, 30, 43], "relu_output": 29, "relunod": 29, "remain": 43, "remot": [7, 14, 17, 35, 36, 38, 39], "remov": [22, 29, 39, 43], "renam": 29, "renamed_input1": [29, 43], "renamed_input2": [29, 43], "renamed_output1": [29, 43], "renamed_output2": [29, 43], "renameinput": 43, "renameoutput": 43, "reorder": 29, "replac": [2, 29, 42, 43, 44], "repo": [5, 6, 8], "repositori": [28, 37], "repres": [3, 22, 29, 30, 33, 35, 41, 43], "represent": [8, 26], "request": [11, 42], "requir": [2, 3, 8, 10, 13, 16, 17, 18, 20, 22, 24, 27, 28, 32, 33, 34, 35, 39, 42, 43], "requirements_fil": [27, 35], "reserv": 18, "reshap": 22, "reshape_1": 26, "resid": [5, 6], "resiz": 22, "resize_to": 22, "resnet": [0, 42], "resolut": 0, "resolv": 43, "resourc": [7, 11, 13, 18, 20, 27, 35, 39, 42], "resource_group": [7, 11, 13, 18, 20, 35, 39, 42], "resource_group_nam": 7, "resourcegroup": 42, "resourcepath": 41, "resourcepathconfig": 41, "respect": [2, 33, 45], "respons": [2, 39, 41], "result": [19, 24, 29, 30, 35, 42, 43], "result_kei": 42, "resume_from_checkpoint": 43, "retain": 30, "retri": [11, 42], "retriev": [18, 42], "return": [1, 2, 3, 19, 39, 42, 43], "reus": 16, "rewrit": 17, "rich": 28, "rmax": 43, "rmin": 43, "rng": 44, "rocmexecutionprovid": [5, 6, 39], "root": [3, 11, 20, 39], "rotat": [34, 42, 43], "rotate_mod": [34, 43], "rotatebas": 43, "rotatemod": 43, "round": 43, "round_interv": 43, "row": [24, 39, 43], "rtn": [6, 39, 43], "rtx": 33, "rule": 10, "run": [2, 5, 6, 7, 8, 10, 11, 12, 14, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 30, 33, 35, 36, 38, 42, 43, 45], "run_config": 39, "runtim": [0, 2, 8, 9, 22, 26, 27, 28, 30, 33, 35, 39, 42, 43, 45], "safe": 15, "safetensor": 39, "sai": [1, 2, 3], "same": [2, 3, 5, 6, 10, 15, 16, 18, 22, 27, 30, 33, 35, 41, 42, 43], "sampl": [2, 39, 42, 43, 44], "samplecod": 27, "sampler": 44, "save": [20, 22, 39, 42, 43], "save_as_external_data": [9, 33, 43], "save_config_fil": 39, "save_format": 43, "save_metadata_for_token_gener": [9, 43], "save_quant_config": 43, "scalabl": 28, "scale": [8, 13, 29, 33, 39, 43], "scale_0": 29, "scaledown": 39, "scenario": [0, 22, 27, 30, 36, 42, 43], "schedul": 43, "schema": [42, 43], "scheme": [24, 43], "scope": 43, "score": 27, "scoring_script": 27, "script": [3, 4, 12, 14, 17, 27, 42, 43], "script_dir": [17, 39, 41, 42, 43], "sdk": [11, 42, 43], "seamless": 33, "search": [3, 10, 32, 39, 40, 42, 43, 44, 45], "search_algorithm": [2, 10, 27, 42], "search_algorithm_config": [10, 27, 42], "search_default": [3, 43], "search_opt": [5, 6], "search_strategi": [10, 27, 42], "searchabl": [2, 3, 32, 33], "searchable_valu": [32, 33], "searcher": 44, "searchparamet": 3, "second": [2, 11, 13, 39, 42], "secret": [18, 42], "section": [2, 11, 18, 20, 28, 42], "see": [12, 22, 30, 39, 42, 43], "seed": [10, 27, 39, 42, 43, 44], "seen": 28, "select": [2, 12, 33, 39, 43], "self": [1, 3], "semi": [24, 43], "sensibl": [5, 6], "sensit": 39, "sentence1": [10, 17], "sentence2": [10, 17], "separ": [1, 11, 22, 37, 39, 42, 43], "seq_len": 17, "seqlen": [39, 43], "sequenc": [9, 22, 36, 39, 42, 43, 45], "sequence_length": [18, 42], "sequence_length_idx": 42, "sequenceconstruct": 22, "sequenti": [29, 43], "serial": 43, "serv": [7, 17, 45], "server": 27, "servic": 18, "session": [15, 22, 28, 43], "session_opt": [27, 42], "session_params_tun": [9, 18], "session_params_tuning_data_config": 22, "sessionopt": 43, "set": [2, 3, 5, 6, 7, 9, 10, 11, 12, 16, 18, 19, 22, 25, 26, 27, 30, 32, 33, 39, 42, 43, 45], "set_active_adapt": 7, "set_search_opt": [5, 6, 7], "setup": [33, 39], "sever": [12, 17, 28, 42], "shape": [22, 29, 39, 42, 43], "share": [14, 42], "shared_kv": 42, "ship": 45, "shot": [8, 24], "should": [1, 3, 12, 13, 17, 29, 30, 39, 41, 42, 43], "show": [7, 17, 28], "show_unconsumed_nod": 43, "shut": 15, "sigmoid": 43, "sign": [5, 6, 43], "signatur": 42, "signific": 24, "similar": 30, "simlar": 34, "simpl": [3, 5, 6, 10, 22, 27], "simple_dataset": 17, "simplest": 30, "simpli": [17, 39], "simplif": 22, "simplifi": [18, 22, 35], "simplifiedlayernorm": [30, 43], "simultan": 24, "sinc": [3, 5, 6, 16, 22, 27, 32, 35], "singl": [0, 7, 8, 17, 27, 28, 29, 33, 34, 43], "site": 43, "situat": 43, "size": [13, 16, 19, 23, 24, 27, 28, 30, 39, 42, 43], "size_threshold": 43, "skip": [16, 30, 42], "skip_dataset": 1, "skiplayernorm": 30, "skipsimplifiedlayernorm": [30, 43], "sku": [27, 35], "slice": [22, 24, 43], "slicegpt": [0, 42], "slm": 28, "slower": 16, "small": [11, 22, 28, 42], "smaller": [24, 42, 43], "smollm": [5, 6], "smooth": 43, "smooth_quant": 43, "smooth_quant_arg": 43, "snapdragon": 26, "snippet": 7, "snpe": [0, 10, 21, 30, 39, 42], "snpe_root": 26, "snpeconvers": [26, 42], "snpedevic": 43, "snpemodelhandl": [41, 42, 43], "snpequant": [26, 42], "snpetoonnxconvers": 42, "so": [3, 5, 6, 8, 25, 27, 28, 30, 43], "soc": 43, "softwar": [25, 26], "solut": 33, "some": [2, 3, 8, 17, 22, 30, 32, 35, 42, 43], "someth": [5, 6], "sometim": [11, 17, 42], "soon": 43, "sort": 27, "sourc": [5, 41, 42, 43], "source_dtyp": [30, 43], "space": [2, 3, 32, 42, 43, 44], "spars": [24, 31, 43], "sparsegpt": [0, 42], "sparsif": 24, "sparsiti": [0, 8, 24, 31, 43], "spec": [2, 28, 35], "special": [39, 43], "specialparamvalu": 43, "specif": [1, 2, 4, 18, 22, 25, 27, 29, 30, 32, 33, 36, 38, 39, 42, 43], "specifi": [1, 3, 10, 11, 12, 13, 18, 19, 20, 24, 27, 28, 29, 30, 33, 35, 39, 42, 43], "spectrogram": 0, "speed": [8, 16, 33, 34, 43], "speedup": 24, "split": [10, 17, 18, 22, 36, 42, 43], "splitmodel": 42, "sport": 7, "squeez": 22, "squeezenet": 0, "ss": 15, "stabl": [0, 37, 43, 44], "stack": 10, "stage": 30, "stand": [23, 30], "standalon": 43, "standard": [33, 42, 43], "standard_nd12": 35, "standard_nd24": 35, "standard_nd24r": 35, "standard_nd40rs_v2": 35, "standard_nd6": 35, "standard_nd96amsr_a100_v4": 35, "standard_nd96asr_v4": 35, "start": [17, 27, 36, 38, 43, 45], "start_header_id": [5, 6, 7, 8], "state": 8, "static": [0, 3, 33, 42, 43], "static_group": 43, "step": [3, 8, 9, 11, 12, 15, 16, 18, 22, 33, 42, 43, 45], "still": [10, 20, 28], "stop": 42, "stop_when_goals_met": 42, "storag": 39, "store": [5, 6, 8, 15, 16, 20, 39, 42, 43], "str": [1, 3, 10, 11, 16, 17, 27, 41, 42, 43], "strategi": [18, 36, 40, 42, 43, 45], "strategy_kwarg": 43, "stream": [5, 6, 11, 42], "strftime": 43, "strict": 43, "strictli": 43, "string": [3, 18, 22, 39, 42, 43], "string_nam": 20, "string_to_int_dim_param": 42, "strive": 33, "structur": [2, 22, 24, 28, 31, 42, 43, 44], "sub": [2, 22, 42, 43], "sub_mul_0": 29, "sub_tanh_0": 29, "sub_typ": [1, 10, 18, 19, 42], "subexpress": 22, "subgraph": [28, 43], "submit": 15, "subscript": [11, 13, 35, 39, 42], "subscription_id": [11, 13, 18, 20, 35, 39, 42], "subset": [10, 17, 39], "subtyp": 42, "successfulli": [7, 8], "suggest": 44, "suit": 30, "suitabl": [30, 43], "super": 0, "superced": 43, "superresolut": 22, "suppli": 39, "support": [1, 2, 3, 5, 7, 10, 11, 16, 18, 20, 22, 23, 24, 30, 31, 32, 33, 34, 35, 36, 39, 41, 42, 43], "suppurt": 42, "sure": [12, 16, 18], "surgeon": [36, 43], "surgeri": [42, 43], "surround": 43, "swift": 33, "sy": 1, "sym": 43, "symbol": [30, 39, 43], "symmetr": [39, 43], "system": [3, 5, 6, 9, 10, 11, 33, 36], "t": [3, 11, 20, 22, 27, 28, 30, 35, 39, 42, 43], "t1": 43, "tabl": [17, 42], "tag": 35, "tail": 43, "tailor": 33, "take": [3, 5, 6, 8, 12, 15, 17, 19, 28, 31, 36, 39, 42, 43], "tanh": 29, "target": [0, 2, 6, 9, 10, 18, 19, 25, 27, 28, 30, 33, 35, 36, 39, 42, 43, 45], "target_devic": 43, "target_dtyp": [30, 43], "target_environ": 27, "target_environment_vers": 27, "target_modul": [39, 43], "target_opset": [9, 22, 30, 39, 42, 43], "task": [1, 4, 5, 10, 17, 18, 22, 39, 41, 42, 45], "team": 45, "techniqu": [2, 3, 4, 22, 24, 28, 33, 34, 36, 42, 43, 45], "technologi": 25, "tempdir": 39, "temperatur": 43, "tempfil": 39, "templat": [5, 6, 10, 39, 42], "tensor": [8, 22, 24, 29, 31, 34, 39, 43], "tensor_nam": 43, "tensorfloat": 43, "tensorflow": [25, 26, 33, 39, 42, 43], "tensorflowmodelhandl": [23, 43], "tensorquantoverrid": 43, "tensorrt": [3, 5, 6, 8, 24, 31, 39, 42, 43], "tensorrtexecutionprovid": [5, 6, 10, 35, 39], "term": 28, "termin": [15, 18], "test": [8, 26, 29, 37, 39, 42, 43], "text": [5, 6, 10, 17, 18, 39, 41, 42], "text_classification_post_process": 17, "text_col": 18, "text_field": 39, "text_generation_huggingface_pre_process": 17, "text_generation_post_process": 17, "text_templ": [7, 8, 39], "tf": 43, "tf2onnx": 43, "tf32": 43, "than": [6, 8, 19, 28, 33, 39, 43], "thei": [11, 17, 27, 35, 39, 42], "them": [1, 13, 18, 19, 22, 25, 26, 28, 30, 33, 34, 43], "therefor": [5, 6, 28, 35], "thi": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 42, 43, 44], "thing": 32, "thread": [22, 39, 43], "three": [2, 32], "threshold": [22, 42, 43], "through": [5, 6, 10, 22, 24, 33, 42], "throughput": [2, 36, 42], "throughput_data_config": 19, "thu": [2, 22, 33, 42], "timdettm": 18, "time": [5, 6, 10, 11, 13, 15, 16, 22, 23, 27, 30, 33, 35, 42, 43, 45], "timeout": [11, 42, 43], "tip": 43, "togeth": [2, 22, 42, 45], "token": [2, 5, 6, 7, 17, 18, 24, 39, 42, 43], "token_type_id": 17, "tokenizer_dir": 33, "tokenizer_stream": [5, 6, 7], "toler": 43, "tone": [7, 8], "too": [11, 28, 42], "tool": [8, 10, 13, 22, 25, 26, 28, 33, 36, 42, 43], "tool_command": [22, 43], "tool_command_arg": [22, 43], "toolkit": [0, 23, 45], "tools_snp": 43, "top": [15, 20, 27, 42], "topic": 43, "torch": [30, 31, 34, 39, 42, 43], "torch_dtyp": [39, 43], "torch_metr": 42, "torch_tensorrt": 31, "torchmetr": 42, "torchtrt": 0, "torchtrtconvers": 42, "total_sequence_length": 18, "tp": 2, "tpe": [2, 10, 27, 39, 42, 44], "tpesampl": 44, "trade": 34, "train": [0, 8, 18, 22, 30, 34, 39, 42, 43], "train_data_config": [24, 43], "train_split": 39, "train_subset": 39, "trainabl": [22, 24, 43], "trainer": 43, "training_arg": [24, 43], "training_loop_func": [24, 43], "trainingargu": 43, "transform": [0, 5, 8, 17, 18, 23, 24, 28, 29, 30, 31, 34, 37, 42, 43, 45], "transformer_token_dummy_data": 9, "transformers_dummy_data_config": 17, "transformers_optim": 42, "transformersdummydatacontain": 17, "transformerspromptdummydatacontain": 17, "transformerstokendummydatacontain": [9, 17], "transit": 23, "transpos": [22, 43], "tree": [2, 42, 44], "trial": [2, 45], "trt": [0, 10], "trt_fp16": 10, "trt_fp16_enabl": [39, 43], "trt_perf_tun": 10, "trt_transformers_optim": 10, "trtmodul": 31, "true": [3, 10, 16, 18, 19, 22, 26, 27, 30, 32, 33, 35, 39, 42, 43, 44], "true_sequenti": 43, "trust": [17, 39], "trust_remote_cod": [5, 6, 7, 8, 17, 39], "try": [5, 6, 10, 42], "tune": [0, 2, 3, 7, 8, 24, 33, 34, 42, 43, 45], "tuning_criterion": 43, "tuningcriterion": 43, "turn": [2, 15, 36, 43], "tutori": [12, 22], "two": [2, 22, 32, 33, 35, 43], "txt": [17, 25, 27, 35], "type": [1, 2, 3, 9, 10, 11, 15, 17, 18, 20, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 39, 41, 42, 43, 44], "type_": 3, "typic": [16, 35, 45], "u": 22, "ubuntu20": 42, "ubuntu22": [11, 35], "uint16": 39, "uint32": 39, "uint4": [39, 43], "uint8": [22, 39, 43], "uncas": [10, 17], "under": [10, 17, 42], "undergo": 23, "underneath": [22, 44], "underscor": 17, "understand": 28, "unet": 43, "uniform": 23, "union": [3, 42], "uniqu": [17, 42], "unit": 22, "unless": 43, "unset": 43, "unsign": 43, "unsqueez": 22, "unstructur": 24, "unsupport": 33, "until": 16, "unus": 22, "up": [5, 6, 7, 8, 10, 16, 18, 19, 24, 34, 38, 43], "updat": [7, 8, 30, 43], "update_shared_cach": 16, "upload": [3, 16, 27], "upon": 34, "url": [16, 42], "us": [1, 2, 3, 8, 9, 12, 13, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44], "usabl": 33, "usag": [18, 24, 39], "use_audio_decod": 22, "use_chat_templ": 39, "use_dynamo_export": [39, 43], "use_enhanced_quant": 43, "use_external_data_format": 43, "use_forced_decoder_id": 43, "use_gpu": 43, "use_gqa": 43, "use_int4": 43, "use_logits_processor": 43, "use_model_build": 39, "use_ort_extens": [27, 41], "use_ort_genai": [5, 6, 7, 8, 39], "use_prefix_vocab_mask": 43, "use_qdq_encod": 39, "use_symbolic_shape_inf": 43, "use_temperatur": 43, "use_transpose_op": 43, "use_vocab_mask": 43, "user": [2, 3, 5, 6, 7, 8, 10, 11, 17, 22, 27, 28, 30, 32, 33, 34, 35, 36, 39, 41, 42, 43, 45], "user_config": [19, 42], "user_dir": 17, "user_input": 7, "user_script": [17, 19, 22, 24, 26, 33, 42, 43], "usernam": [5, 6, 8], "usual": 33, "util": [28, 42, 43], "v2": 26, "vae": 43, "vai_q_onnx": 43, "val_data_config": 43, "valid": [3, 10, 17, 22, 23, 41, 42, 43], "validation_func": 23, "valu": [1, 2, 3, 8, 10, 11, 13, 17, 18, 19, 22, 24, 27, 29, 30, 32, 33, 39, 42, 43], "value1": 39, "value2": 39, "value_info": 29, "var": 27, "variabl": [12, 25, 26, 35, 42], "varieti": 33, "variou": [22, 39], "vault": 18, "vector": 8, "vendor": 45, "verbos": 42, "verif": 12, "version": [11, 18, 20, 23, 27, 29, 35, 37, 39, 42, 43], "versu": 7, "vgg": 0, "via": [7, 11, 42, 43], "video": 33, "virtual": [5, 35, 37], "virtualenv": 35, "vision": 0, "visual": 23, "viti": [0, 42, 43], "vitis_ai_quant": 33, "vitisaiexecutionprovid": 39, "vitisaiquant": [33, 42], "vm": [13, 39], "vm_size": [13, 39], "vnni": 43, "vocab_mask": 43, "vpu": 43, "vscode": 42, "w": 43, "w_bit": [34, 43], "wa": 34, "wai": [16, 17, 19, 22, 24, 28, 33, 39, 43], "want": [7, 8, 11, 15, 16, 17, 18, 27, 30, 37, 42, 43], "warmup": 43, "warmup_ratio": 43, "warn": [39, 42], "we": [2, 5, 10, 12, 17, 22, 26, 28, 32, 36, 37, 42, 43], "web": 22, "webgpu": 30, "weight": [0, 2, 5, 6, 7, 8, 22, 23, 24, 26, 31, 33, 34, 39, 42, 43], "weight_correct": 43, "weight_onli": [33, 43], "weight_only_config": [33, 43], "weight_only_quant_config": 43, "weight_precis": 39, "weight_typ": [32, 33, 42, 43], "weightsfileformat": 43, "weightsymmetr": 43, "well": [17, 23, 43], "were": [27, 42, 43], "what": 43, "when": [1, 2, 3, 16, 18, 22, 23, 26, 27, 32, 35, 39, 42, 43, 44], "where": [1, 11, 13, 17, 22, 24, 27, 28, 30, 31, 35, 36, 39, 42, 43], "whether": [3, 17, 22, 27, 28, 32, 39, 42, 43], "which": [2, 3, 5, 6, 7, 10, 11, 17, 18, 19, 22, 23, 24, 25, 27, 30, 33, 34, 35, 39, 41, 42, 43, 45], "while": [3, 5, 6, 7, 22, 28, 30, 33, 43], "whisper": [0, 22, 41, 42, 43], "whisperbeamsearch": [42, 43], "whose": [29, 43], "width": [22, 24], "wikitest": 43, "wikitext2": 24, "wikitext2_train": 34, "window": [5, 6, 16, 43], "wise": 43, "wish": 8, "with_replac": 44, "within": [11, 18], "without": [2, 15, 16, 34, 36, 39, 42, 44], "wonder": 7, "word_length": 42, "work": [34, 42], "workflow": [1, 2, 4, 11, 14, 16, 20, 27, 38, 39, 43, 45], "workflow_host": [15, 42], "workflow_id": [15, 42], "workspac": [2, 7, 11, 12, 13, 18, 20, 27, 35, 39, 42, 43], "workspace_nam": [7, 11, 13, 18, 20, 35, 39, 42], "workspaceblobstor": [15, 35], "world_siz": 42, "would": [7, 10, 17, 43], "write": [1, 10, 22, 23], "x": 43, "x86_64": [25, 43], "xilinx": 42, "xl": 0, "xxyyzzz": [7, 8], "y": [39, 43], "y1_uint8": 22, "yaml": [11, 35, 42, 45], "ycbcrtopixel": 22, "ye": 39, "yet": 22, "yml": 42, "you": [1, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 25, 26, 27, 29, 30, 32, 34, 35, 36, 37, 39, 42, 43, 45], "your": [0, 1, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 25, 26, 27, 34, 36, 37, 38, 39, 42, 43, 45], "yourself": 19, "yyyi": 15, "zero": [8, 29, 33, 43], "zero_point": [29, 43], "zip": [9, 26, 27], "zipfil": 9}, "titles": ["Examples", "Custom Scripts", "Design", "How to add new optimization Pass", "Extending Olive", "Getting started", "Auto Optimization", "Finetune", "Quantize", "Run Olive workflows", "How to use Automatic Optimizer", "Azure AI Integration", "Self-hosted Kubernetes cluster", "Azure ML scripts", "Azure AI", "Remote Workflow", "Shared Cache", "How To Configure Data", "Huggingface Integration", "How To Configure Metrics", "How To Set Model Path", "Model Configuration", "ONNX", "OpenVINO", "PyTorch", "QNN", "SNPE", "Packaging Olive artifacts", "Model Splitting", "ONNX Surgeon Classes Documentation", "ONNX", "PyTorch", "How to configure a Workflow Pass", "ONNX Quantization", "PyTorch Quantization", "How To Configure Systems", "How-to", "Installation", "Olive: The AI Model Optimization Toolkit for the ONNX Runtime", "Command Line Tools", "Reference", "OliveModels", "Olive Options", "Passes", "SearchAlgorithms", "Overview"], "titleterms": {"0": [5, 6, 7, 8, 9], "001c": [6, 7, 8, 9], "002": [6, 7, 8, 9], "002l": 9, "003": [6, 7, 8, 9], "003a1": 9, "004": [6, 7, 8, 9], "004l": [6, 7, 8, 9], "004l9": [6, 7, 8, 9], "004z": [6, 7, 8, 9], "005": 9, "006": [6, 7, 8, 9], "006a": [6, 7, 8, 9], "008": [6, 7, 8, 9], "009": [6, 7, 8, 9], "009l": [6, 7, 8, 9], "018": 9, "02": 9, "02l": 9, "034l": [6, 7, 8, 9], "036": [6, 7, 8, 9], "042": 9, "042z": 9, "045": 9, "047": [6, 7, 8, 9], "058": 9, "06": 9, "064": 9, "06l": 9, "06l2": [6, 7, 8, 9], "06zm6": 9, "074l3": [6, 7, 8, 9], "088": 9, "09": 9, "0em": [5, 6, 7, 8, 9], "0h": 9, "0h12": 9, "0h2a": [5, 7], "0l2": 9, "0v": [5, 7], "0v1": [5, 7], "0z": 9, "0zm8": 9, "1": [3, 5, 6, 7, 8, 9, 23], "10": [6, 7, 8, 9], "102a1": 9, "103": 9, "11": 9, "112": 9, "116": 9, "117": 9, "118": 9, "119": 9, "119l": 9, "11l1": 9, "12": [6, 7, 8, 9], "123": 9, "123c1": [6, 7, 8, 9], "123zm3": 9, "124": 9, "125": 9, "125l3": 9, "127v1": 9, "12zm6": 9, "138": 9, "14": 9, "142a1": [6, 7, 8, 9], "143": 9, "145": 9, "145l": 9, "148a": 9, "15": 9, "157": [6, 7, 8, 9], "16": [5, 6, 7, 8, 9], "16h1": 9, "172": 9, "175l": 9, "176": 9, "177": 9, "178": 9, "186a7": 9, "186a8": 9, "186c15": 9, "186l": 9, "188": [6, 7, 8, 9], "2": [3, 5, 6, 7, 8, 9, 23], "201": 9, "201c": 9, "204c": 9, "207c0": 9, "213": 9, "213zm3": 9, "214": 9, "215": 9, "215l": 9, "216": 9, "218": 9, "22": [6, 7, 8, 9], "234": 9, "249": [6, 7, 8, 9], "25": [5, 6, 7, 8, 9], "25a": [5, 6, 7, 8, 9], "25a2": [5, 7], "25h": [5, 7, 9], "25h12": 9, "25v1": 9, "25v12": 9, "25v8h": [5, 7], "25v9": [5, 7], "25zm1": 9, "25zm7": 9, "26": 9, "266": 9, "266h3": 9, "273": 9, "275": 9, "286": [6, 7, 8, 9], "288": 9, "2a": 9, "2h6": [5, 7], "3": [3, 5, 6, 7, 8, 9], "302a1": [6, 7, 8, 9], "309a": 9, "31": 9, "314": 9, "315": 9, "317": 9, "326": 9, "334": 9, "336": [5, 7], "338": 9, "34": [6, 7, 8, 9], "346": [6, 7, 8, 9], "35": 9, "354h": [6, 7, 8, 9], "4": [6, 7, 8, 9], "414": [5, 7], "415": [6, 7, 8, 9], "416": 9, "418": [6, 7, 8, 9], "418l": 9, "429zm1": [6, 7, 8, 9], "437": [6, 7, 8, 9], "43a1": [6, 7, 8, 9], "458": 9, "458a7": 9, "458a8": 9, "458c": 9, "459": 9, "459c": 9, "462": [6, 7, 8, 9], "462c": [6, 7, 8, 9], "47": 9, "47a": 9, "499": 9, "49l": 9, "4h3": 9, "5": [5, 6, 7, 8, 9], "502": 9, "503": [6, 7, 8, 9], "504": [6, 7, 8, 9], "516": [6, 7, 8, 9], "521": 9, "528": 9, "53": 9, "538": [6, 7, 8, 9], "53a": 9, "548": 9, "56": 9, "564": 9, "56c": 9, "57": [6, 7, 8, 9], "571a": [6, 7, 8, 9], "583": 9, "59": 9, "598": 9, "5a": [5, 7, 9], "5a1": 9, "5a2": [5, 7], "5c": 9, "5c0": [5, 7, 9], "5c15": 9, "5h": [5, 7], "5h2": [6, 7, 8, 9], "5h3": [5, 6, 7, 8, 9], "5h7": [5, 7], "5l": 9, "5v": [5, 7], "5z": [5, 7], "5zm3": [5, 7], "6": [5, 6, 7, 8, 9], "608": 9, "616": 9, "623h": 9, "633": [6, 7, 8, 9], "641": 9, "665": 9, "671": 9, "678": 9, "683": [6, 7, 8, 9], "69": 9, "694": [6, 7, 8, 9], "7": [5, 6, 7, 8, 9], "707a": 9, "71": 9, "713": [6, 7, 8, 9], "713l10": [6, 7, 8, 9], "717a": 9, "723l5": [6, 7, 8, 9], "732": 9, "734": 9, "734l10": 9, "74": 9, "748": 9, "749": 9, "75": [5, 6, 7, 8, 9], "751": 9, "752": 9, "754": 9, "75a": [5, 6, 7, 8, 9], "75a1": 9, "75a2": [5, 7], "75c0": 9, "75h": [5, 7], "75h8": [5, 7], "75v": [5, 7, 9], "75v1": [5, 7], "75v12": 9, "75v2": [5, 7], "75v3": [5, 7], "75v5": [5, 7], "75v6": [5, 7], "75z": [5, 7], "75zm5": [5, 7], "76": 9, "766c": 9, "774": 9, "774a1": 9, "78": 9, "783": 9, "784": 9, "793a": [6, 7, 8, 9], "7h9": [6, 7, 8, 9], "8": [5, 6, 7, 8, 9], "816a1": 9, "833": 9, "871": [6, 7, 8, 9], "8l1": 9, "9": [5, 7, 9], "902": 9, "904": 9, "904v3": 9, "909": [6, 7, 8, 9], "935c": 9, "938": 9, "94": 9, "949": 9, "954": 9, "95zm12": 9, "979": 9, "979l": 9, "97a": 9, "982": [6, 7, 8, 9], "9h6": [6, 7, 8, 9], "The": 38, "To": [17, 19, 20, 35], "acceler": 35, "accuraci": 19, "adapt": [7, 22, 39], "add": [3, 12], "advanc": 36, "ai": [11, 14, 33, 38], "algorithm": 2, "altern": 16, "amd": 33, "aml": 12, "aml_config": 42, "aml_config_path": 42, "an": [11, 15], "append": 22, "appendprepostprocessingop": 43, "approach": 28, "arc": 12, "argument": [5, 6, 39], "aria": [5, 6, 7, 8, 9], "artifact": 27, "auto": [6, 7, 10, 28, 39], "autoawq": 34, "autoawqquant": 43, "autogptq": 34, "automat": [5, 10], "avail": 5, "awar": 24, "azur": [11, 12, 13, 14, 15, 16, 18, 42], "azureml": [11, 15, 18, 20, 35, 39, 42], "azureml_cli": 42, "azuremldata": 27, "azuremldeploy": 27, "azuremlmodel": 27, "beam": 22, "benefit": 45, "binari": 25, "blob": 16, "builder": 39, "built": 17, "cach": [16, 39], "candidatemodel": 27, "captur": 39, "capturesplitinfo": 43, "class": [3, 5, 6, 7, 8, 9, 29], "cli": [28, 36], "client": [11, 42], "cluster": 12, "code": 9, "command": 39, "compon": 17, "composit": 41, "compressor": 33, "comput": [11, 15], "conclus": 28, "config": [17, 18, 27, 42], "configur": [3, 10, 15, 16, 17, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 41], "context": 25, "convers": [23, 25, 26, 30], "convert": [30, 39], "cost": [28, 39], "creat": 12, "curat": 11, "custom": [1, 17, 19], "d": [5, 6, 7, 8, 9], "data": [11, 17], "dataset": 18, "datastor": [11, 20], "datatyp": 30, "defin": [3, 9], "depend": [15, 16, 37], "dependabot": [5, 7], "descript": 29, "design": 2, "detail": [5, 6], "develop": 23, "distribut": 41, "docker": [18, 35], "dockerfil": 27, "document": 29, "download": [25, 26], "dynam": 30, "dynamictofixedshap": 43, "edit": 37, "engin": [2, 42], "environ": [18, 35], "evalu": [2, 42], "exampl": [0, 1, 3, 22, 23, 24, 25, 26, 29, 30, 31, 33, 34, 42], "execut": 2, "exhaustivesearchalgorithm": 44, "export": 39, "exposeoutput": 29, "exposequantizedoutput": 29, "extend": 4, "extens": 12, "extra": [15, 23], "extract": 22, "extractadapt": 43, "face": 5, "field": 42, "file": [9, 20, 27, 39], "finetun": [7, 8, 39], "fix": 30, "float16": 30, "flow": 42, "folder": 20, "from": [11, 23, 37], "function": 3, "gener": [17, 25, 28, 39], "get": 5, "gptqquantiz": 43, "graph": 39, "graphsurgeri": 43, "handler": 41, "height": [5, 6, 7, 8, 9], "hf": 41, "hftrainingargu": 43, "hidden": [5, 6, 7, 8, 9], "host": [11, 12, 42], "how": [3, 10, 17, 19, 20, 27, 32, 35, 36], "hub": 18, "hug": 5, "huggingfac": 18, "i": [27, 45], "id": 42, "implement": 3, "import": [16, 29], "incdynamicquant": 43, "incquant": 43, "incstaticquant": 43, "infer": [5, 6, 7, 27], "infershap": 29, "inform": [39, 42], "input": [18, 30, 39, 42], "insert": 22, "insertbeamsearch": 43, "instal": [5, 15, 16, 23, 37], "integr": [11, 18, 36], "intel": 33, "introduct": 18, "isol": 35, "job": [11, 20], "json": [27, 42], "kubernet": 12, "latenc": 19, "learn": [11, 12, 15], "librari": 25, "line": 39, "link": 12, "list": 13, "load": 18, "local": [12, 18, 20, 35], "loftq": [24, 43], "log": 5, "login": 18, "lora": [24, 39, 43], "m0": 9, "m14": 9, "m5": [5, 7], "m6": [5, 7], "m9": [6, 7, 8, 9], "machin": [11, 12, 15], "manag": 35, "manage_compute_inst": 13, "map": 30, "matmulnbitstoqdq": 43, "mergeadapterweight": [24, 43], "metric": [18, 19, 27], "mix": 30, "mixedprecisionoverrid": 43, "ml": [11, 13, 18, 42], "model": [5, 6, 7, 11, 18, 20, 21, 23, 25, 26, 27, 28, 30, 33, 38, 39, 41, 42], "modelbuild": 43, "more": [5, 6], "multipl": 19, "name": [20, 39], "nativ": 35, "neural": 33, "new": 3, "note": [16, 29], "notebook": 5, "nvidia": 33, "octicon": [5, 6, 7, 8, 9], "oliv": [4, 5, 9, 15, 23, 25, 26, 27, 38, 42, 45], "olivemodel": 41, "onnx": [5, 6, 7, 8, 22, 29, 30, 33, 38, 39, 41, 43], "onnxconvers": 43, "onnxdynamicquant": 43, "onnxfloattofloat16": 43, "onnxiodatatypeconvert": 43, "onnxmatmul4quant": 43, "onnxopversionconvers": 43, "onnxpeepholeoptim": 43, "onnxquant": 43, "onnxruntim": [33, 39], "onnxstaticquant": 43, "op": 22, "openvino": [23, 41, 43], "openvinoconvers": 43, "openvinoquant": 43, "opt": 28, "optim": [3, 5, 6, 7, 8, 10, 21, 22, 33, 38, 39], "optimumconvers": 43, "optimummerg": 43, "option": [23, 37, 39, 42], "order": 2, "ort": [22, 35], "ortmixedprecis": 43, "ortsessionparamstun": 43, "orttransformersoptim": 43, "output": [11, 15, 20, 30], "overview": [16, 45], "packag": 27, "param": 39, "pass": [2, 3, 32, 42, 43], "path": [5, 6, 7, 8, 9, 20], "peeophol": 22, "perform": 22, "pip": [5, 37], "post": [22, 23, 26], "pre": [8, 22], "precis": 30, "prerequisit": [23, 25, 26, 35], "process": [8, 22], "provid": 39, "ptq": [23, 26], "pypi": 23, "python": [18, 35], "pytorch": [24, 31, 34, 39, 41, 43], "qlora": [24, 43], "qnn": [25, 43], "qnncontextbinarygener": 43, "qnnconvers": 43, "qnnmodellibgener": 43, "qnnpreprocess": 43, "qualcomm": 39, "quantiz": [8, 23, 24, 25, 26, 33, 34, 39], "quantizationawaretrain": 43, "quarot": [34, 43], "quickstart": [6, 7, 8, 9], "randomsearchalgorithm": 44, "rank": 27, "readymad": 35, "refer": 40, "regist": 11, "registri": 20, "remot": [15, 20], "removeinitializerfrominput": 29, "removeinput": 29, "removeshap": 29, "renameinput": 29, "renameoutput": 29, "reorderinput": 29, "replaceerfwithtanh": 29, "rocket": 9, "run": [3, 9, 15, 39], "runtim": [5, 6, 7, 23, 38], "script": [1, 13, 39], "script_dir": 1, "sd": [5, 6, 7, 8, 9], "sdk": [25, 26, 39], "search": [2, 22], "searchalgorithm": 44, "self": 12, "session": 39, "set": [20, 36], "setup": 16, "shape": 30, "share": [16, 39], "slicegpt": [24, 43], "snpe": [26, 41, 43], "snpeconvers": 43, "snpequant": 43, "snpetoonnxconvers": 43, "sourc": 37, "sparsegpt": [24, 43], "spinquant": [34, 43], "split": [28, 39], "splitmodel": 43, "squar": 9, "start": 5, "storag": 16, "store": 11, "strategi": 2, "string": 20, "support": [8, 17], "surgeon": 29, "surgeri": 29, "svg": [5, 6, 7, 8, 9], "system": [2, 15, 18, 35, 42], "target": 11, "techniqu": 8, "templat": 17, "tensorrt": 33, "throughput": 19, "tool": [23, 39], "toolkit": 38, "torchtrtconvers": [31, 43], "tpesearchalgorithm": 44, "train": [23, 24, 26], "transform": 22, "true": [5, 6, 7, 8, 9], "try": 45, "tune": [22, 39], "type": 19, "unzip": [25, 26], "up": 36, "us": [5, 6, 7, 10, 11, 45], "usag": [13, 16], "user_script": 1, "version": [5, 6, 7, 8, 9], "viewbox": [5, 6, 7, 8, 9], "viti": 33, "vitisaiquant": 43, "what": [27, 45], "width": [5, 6, 7, 8, 9], "window": 33, "work": 36, "workflow": [9, 15, 32, 36, 42], "workspac": 15, "yaml": 9, "zap": [6, 7, 8, 9], "zerooutinput": 29, "zipfil": 27}}) \ No newline at end of file