From 1c4959e0c42848767a7536ac9ed2aafc16eb8939 Mon Sep 17 00:00:00 2001 From: lochen <cloga0216@gmail.com> Date: Wed, 24 Nov 2021 20:45:55 +0800 Subject: [PATCH 1/2] change pipeline data to pipeline output dataset --- ...l-pipelines-with-data-dependency-steps.ipynb | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb index 419303a42..9b89c14f7 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb @@ -252,8 +252,12 @@ "# is_directory=None)\n", "\n", "# Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.\n", - "processed_data1 = PipelineData(\"processed_data1\",datastore=def_blob_store)\n", - "print(\"PipelineData object created\")" + "# Promote pipelinedata to pipeline_output_dataset, which will use dataset instead of data reference \n", + "\n", + "from azureml.pipeline.core.pipeline_output_dataset import PipelineOutputFileDataset\n", + "\n", + "processed_data1 = PipelineOutputFileDataset(PipelineData(\"processed_data1\",datastore=def_blob_store))\n", + "print(\"PipelineOutputFileDataset object created\")" ] }, { @@ -544,10 +548,13 @@ "Azure ML" ], "friendly_name": "Azure Machine Learning Pipelines with Data Dependency", + "interpreter": { + "hash": "3e9e0e270b75c5e6da2e22113ba4f77b864d68f95da6601809c29e46c73ae6bb" + }, "kernelspec": { "display_name": "Python 3.6", "language": "python", - "name": "python36" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -559,7 +566,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.7.8" }, "order_index": 2, "star_tag": [ @@ -572,4 +579,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} From 96a8e069178c707bfa39cd77f04ba4cde18a1f9b Mon Sep 17 00:00:00 2001 From: lochen <cloga0216@gmail.com> Date: Thu, 25 Nov 2021 09:24:42 +0800 Subject: [PATCH 2/2] make change to use pipeline_output_dataset --- .../aml-pipelines-with-data-dependency-steps.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb index 9b89c14f7..587875733 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb @@ -230,7 +230,9 @@ "- **output_name:** Name of the output\n", "- **output_mode:** Specifies \"upload\" or \"mount\" modes for producing output (default: mount)\n", "- **output_path_on_compute:** For \"upload\" mode, the path to which the module writes this output during execution\n", - "- **output_overwrite:** Flag to overwrite pre-existing data" + "- **output_overwrite:** Flag to overwrite pre-existing data\n", + "\n", + "As PipelineData is using [DataReference](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.data_reference.datareference?view=azure-ml-py) to represent the data, which is not the recommanded approch, we will recommand you to use [pipeline_output_dataset](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline_output_dataset?view=azure-ml-py) instead, once promoted to an Azure Machine Learning dataset, it will also be consumed as a Dataset instead of a DataReference in subsequent steps." ] }, {