microsoft
diff --git a/Diff for: ‎.azureml.example/config.json
+5 b/Diff for: ‎.azureml.example/config.json
+5
diff --git a/Diff for: ‎.env.example
+8 b/Diff for: ‎.env.example
+8
diff --git a/Diff for: ‎.gitignore
+5-1 b/Diff for: ‎.gitignore
+5-1
diff --git a/Diff for: ‎README.md
+70-52 b/Diff for: ‎README.md
+70-52
diff --git a/Diff for: ‎conftest.py
+2 b/Diff for: ‎conftest.py
+2
diff --git a/Diff for: ‎contrib/README.md
+12 b/Diff for: ‎contrib/README.md
+12
diff --git a/Diff for: ‎contrib/experiments/interpretation/dutchf3_section/README.md
+1-1 b/Diff for: ‎contrib/experiments/interpretation/dutchf3_section/README.md
+1-1
diff --git a/Diff for: ‎contrib/experiments/interpretation/penobscot/README.md
+1-1 b/Diff for: ‎contrib/experiments/interpretation/penobscot/README.md
+1-1
diff --git a/Diff for: ‎scripts/run_all.sh renamed to ‎contrib/scripts/run_all.sh
+1-1 b/Diff for: ‎scripts/run_all.sh renamed to ‎contrib/scripts/run_all.sh
+1-1
diff --git a/Diff for: ‎scripts/run_distributed.sh renamed to ‎contrib/scripts/run_distributed.sh
+6-3 b/Diff for: ‎scripts/run_distributed.sh renamed to ‎contrib/scripts/run_distributed.sh
+6-3
diff --git a/Diff for: ‎scripts/test_all.sh renamed to ‎contrib/scripts/test_all.sh
+2-2 b/Diff for: ‎scripts/test_all.sh renamed to ‎contrib/scripts/test_all.sh
+2-2
diff --git a/Diff for: ‎contrib/tests/cicd/aml_build.yml
+110 b/Diff for: ‎contrib/tests/cicd/aml_build.yml
+110
diff --git a/Diff for: ‎cv_lib/cv_lib/__init__.py
+2 b/Diff for: ‎cv_lib/cv_lib/__init__.py
+2
diff --git a/Diff for: ‎cv_lib/cv_lib/event_handlers/__init__.py
+1-1 b/Diff for: ‎cv_lib/cv_lib/event_handlers/__init__.py
+1-1
diff --git a/Diff for: ‎cv_lib/cv_lib/event_handlers/azureml_handlers.py
+2 b/Diff for: ‎cv_lib/cv_lib/event_handlers/azureml_handlers.py
+2
diff --git a/Diff for: ‎cv_lib/cv_lib/event_handlers/tensorboard_handlers.py
+5-4 b/Diff for: ‎cv_lib/cv_lib/event_handlers/tensorboard_handlers.py
+5-4
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/dutchf3/__init__.py
+2 b/Diff for: ‎cv_lib/cv_lib/segmentation/dutchf3/__init__.py
+2
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/dutchf3/utils.py
-1 b/Diff for: ‎cv_lib/cv_lib/segmentation/dutchf3/utils.py
-1
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/models/patch_deconvnet_skip.py
+1 b/Diff for: ‎cv_lib/cv_lib/segmentation/models/patch_deconvnet_skip.py
+1
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/models/resnet_unet.py
+5 b/Diff for: ‎cv_lib/cv_lib/segmentation/models/resnet_unet.py
+5
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/models/section_deconvnet.py
+1 b/Diff for: ‎cv_lib/cv_lib/segmentation/models/section_deconvnet.py
+1
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/models/section_deconvnet_skip.py
+1 b/Diff for: ‎cv_lib/cv_lib/segmentation/models/section_deconvnet_skip.py
+1
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/models/seg_hrnet.py
+4-5 b/Diff for: ‎cv_lib/cv_lib/segmentation/models/seg_hrnet.py
+4-5
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/models/unet.py
+1 b/Diff for: ‎cv_lib/cv_lib/segmentation/models/unet.py
+1
diff --git a/Diff for: ‎cv_lib/cv_lib/segmentation/utils.py
+1-2 b/Diff for: ‎cv_lib/cv_lib/segmentation/utils.py
+1-2
@@ -0,0 +1,5 @@
+{
+    "subscription_id": "input_sub_id",
+    "resource_group": "input_resource_group",
+    "workspace_name": "input_workspace_name"
+}
@@ -0,0 +1,8 @@
+BLOB_ACCOUNT_NAME=
+BLOB_CONTAINER_NAME=
+BLOB_ACCOUNT_KEY=
+BLOB_SUB_ID=
+AML_COMPUTE_CLUSTER_NAME=
+AML_COMPUTE_CLUSTER_MIN_NODES=
+AML_COMPUTE_CLUSTER_MAX_NODES=
+AML_COMPUTE_CLUSTER_SKU=
@@ -115,4 +115,8 @@ interpretation/environment/anaconda/local/src/cv-lib
 # Rope project settings
 .ropeproject
 
-*.pth
+*.pth
+
+# Seismic data files
+*.sgy
+*.segy
@@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
@@ -6,3 +6,15 @@ We encourage submissions to the contrib folder, and once they are well-tested, d
 
 Thank you.
 
+#### Azure Machine Learning
+If you would like to leverage Azure Machine Learning to create a Training Pipeline with this dataset we have guidance on how do so [here](interpretation/deepseismic_interpretation/azureml_pipelines/README.md)
+
+### HRNet model guidance (experimental for now)
+
+#### HRNet ImageNet weights model
+
+To enable training from scratch on seismic data and to achieve the same results as the benchmarks quoted below you will need to download the HRNet model [pretrained](https://github.com/HRNet/HRNet-Image-Classification) on ImageNet. We are specifically using the [HRNet-W48-C](https://1drv.ms/u/s!Aus8VCZ_C_33dKvqI6pBZlifgJk) pre-trained model; other  HRNet variants are also available [here](https://github.com/HRNet/HRNet-Image-Classification) - you can navigate to those from the [main HRNet landing page](https://github.com/HRNet/HRNet-Object-Detection) for object detection.
+
+Unfortunately, the OneDrive location which is used to host the model is using a temporary authentication token, so there is no way for us to script up model download. There are two ways to upload and use the pre-trained HRNet model on DS VM:
+- download the model to your local drive using a web browser of your choice and then upload the model to the DS VM using something like `scp`; navigate to Portal and copy DS VM's public IP from the Overview panel of your DS VM (you can search your DS VM by name in the search bar of the Portal) then use `scp local_model_location username@DS_VM_public_IP:./model/save/path` to upload
+- alternatively, you can use the same public IP to open remote desktop over SSH to your Linux VM using [X2Go](https://wiki.x2go.org/doku.php/download:start): you can basically open the web browser on your VM this way and download the model to VM's disk
@@ -19,7 +19,7 @@ Now you're all set to run training and testing experiments on the F3 Netherlands
 ### Monitoring progress with TensorBoard
 - from the this directory, run `tensorboard --logdir='output'` (all runtime logging information is
 written to the `output` folder  
-- open a web-browser and go to  either vmpublicip:6006 if running remotely or localhost:6006 if running locally  
+- open a web-browser and go to  either `<vm_public_ip>:6006` if running remotely or localhost:6006 if running locally  
 > **NOTE**:If running remotely remember that the port must be open and accessible 
  
 More information on Tensorboard can be found [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard#launching_tensorboard).
@@ -20,7 +20,7 @@ Also follow instructions for [downloading and preparing](../../../README.md#peno
 ### Monitoring progress with TensorBoard
 - from the this directory, run `tensorboard --logdir='output'` (all runtime logging information is
 written to the `output` folder  
-- open a web-browser and go to  either vmpublicip:6006 if running remotely or localhost:6006 if running locally  
+- open a web-browser and go to  either `<vm_public_ip>:6006` if running remotely or `localhost:6006` if running locally  
 > **NOTE**:If running remotely remember that the port must be open and accessible 
  
 More information on Tensorboard can be found [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard#launching_tensorboard).
 
@@ -39,7 +39,7 @@ nohup time python train.py \
 # wait for python to pick up the runtime env before switching it
 sleep 1
 
-cd ../../dutchf3_patch/local
+cd ../../dutchf3_patch
 
 # patch based without skip connections
 export CUDA_VISIBLE_DEVICES=2
 
@@ -1,7 +1,11 @@
 #!/bin/bash
 
 # number of GPUs to train on
-NGPU=8
+NGPUS=$(nvidia-smi -L | wc -l)
+if [ "$NGPUS" -lt "2" ]; then
+    echo "ERROR: cannot run distributed training without 2 or more GPUs."
+    exit 1
+fi
 # specify pretrained HRNet backbone
 PRETRAINED_HRNET='/home/alfred/models/hrnetv2_w48_imagenet_pretrained.pth'
 # DATA_F3='/home/alfred/data/dutch/data'
@@ -15,9 +19,8 @@ unset CUDA_VISIBLE_DEVICES
 # bug to fix conda not launching from a bash shell
 source /data/anaconda/etc/profile.d/conda.sh
 conda activate seismic-interpretation
-export PYTHONPATH=/storage/repos/forks/seismic-deeplearning-1/interpretation:$PYTHONPATH
 
-cd experiments/interpretation/dutchf3_patch/distributed/
+cd experiments/interpretation/dutchf3_patch/
 
 # patch based without skip connections
 nohup time python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
 
@@ -59,7 +59,7 @@ nohup time python test.py \
     --cfg "configs/${CONFIG_NAME}.yaml" > ${CONFIG_NAME}_test.log 2>&1 &
 sleep 1
 
-cd ../../dutchf3_patch/local
+cd ../../dutchf3_patch
 
 # patch based without skip connections
 export CUDA_VISIBLE_DEVICES=2
@@ -140,7 +140,7 @@ wait
 
 # scoring scripts are in the local folder
 # models are in the distributed folder
-cd ../../dutchf3_patch/local
+cd ../../dutchf3_patch
 
 # patch based without skip connections
 export CUDA_VISIBLE_DEVICES=2
 
@@ -0,0 +1,110 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+# Pull request against these branches will trigger this build
+pr:
+- master
+- staging
+- contrib
+
+# Any commit to this branch will trigger the build.
+trigger:
+- master
+- staging
+- contrib
+
+jobs:
+
+# partially disable setup for now - done manually on build VM
+- job: setup
+  timeoutInMinutes: 10
+  displayName: Setup
+  pool:
+    name: deepseismicagentpool
+  steps:
+    - bash: |
+        # terminate as soon as any internal script fails
+        set -e
+
+        echo "Running setup..."
+        pwd
+        ls
+        git branch
+        uname -ra
+
+# TODO: uncomment in the next release to bring back AML
+#        # setup run environment
+#        ./scripts/env_reinstall.sh
+#
+#        # use hardcoded root for now because not sure how env changes under ADO policy
+#        DATA_ROOT="/home/alfred/data_dynamic"
+#        ./tests/cicd/src/scripts/get_data_for_builds.sh ${DATA_ROOT}
+#
+#        # upload pre-processed data to AML build WASB storage - overwrites by default and auto-creates container name
+#        azcopy --quiet --recursive \
+#          --source ${DATA_ROOT}/dutch_f3/data --destination https://${BLOB_ACCOUNT_NAME}.blob.core.windows.net/${BLOB_CONTAINER_NAME}/data \
+#          --dest-key ${BLOB_ACCOUNT_KEY}
+#      env:
+#        BLOB_ACCOUNT_NAME: $(amlbuildstore)
+#        BLOB_CONTAINER_NAME: "amlbuild"
+#        BLOB_ACCOUNT_KEY: $(amlbuildstorekey)
+#
+#
+#- job: AML_pipeline_tests
+#  dependsOn: setup
+#  timeoutInMinutes: 20
+#  displayName: AML pipeline tests
+#  pool:
+#    name: deepseismicagentpool
+#  steps:
+#  - bash: |
+#      source activate seismic-interpretation
+#      # TODO: add code which launches your pytest files ("pytest sometest" OR "python test.py")
+#      # data is in  $(amlbuildstore).blob.core.windows.net/amlbuild/data (container amlbuild, virtual folder data)
+#      # storage key is $(amlbuildstorekey)
+#      az --version
+#      az account show
+#      az login --service-principal -u $SPIDENTITY -p $SPECRET --tenant $SPTENANT
+#      az account set --subscription $SUB_ID
+#      mkdir .azureml
+#      cat <<EOF > .azureml/config.json
+#      {
+#        "subscription_id": "$SUB_ID",
+#        "resource_group": "$RESOURCE_GROUP",
+#        "workspace_name": "$WORKSPACE_NAME"
+#      }
+#      EOF
+#      pytest interpretation/tests/test_train_pipeline.py || EXITCODE=123
+#      exit $EXITCODE
+#      pytest
+#    env:
+#      SUB_ID: $(subscription_id)
+#      RESOURCE_GROUP: $(resource_group)
+#      WORKSPACE_NAME: $(workspace_name)
+#      BLOB_ACCOUNT_NAME: $(amlbuildstore)
+#      BLOB_CONTAINER_NAME: "amlbuild"
+#      BLOB_ACCOUNT_KEY: $(amlbuildstorekey)
+#      BLOB_SUB_ID: $(subscription_id)
+#      AML_COMPUTE_CLUSTER_NAME: "testcluster"
+#      AML_COMPUTE_CLUSTER_MIN_NODES: "1"
+#      AML_COMPUTE_CLUSTER_MAX_NODES: "8"
+#      AML_COMPUTE_CLUSTER_SKU: "STANDARD_NC6"
+#      SPIDENTITY: $(spidentity)
+#      SPECRET: $(spsecret)
+#      SPTENANT: $(sptenant)
+#    displayName: 'integration tests'
+
+# - job: AML_short_pipeline_test
+#   dependsOn: setup
+#   timeoutInMinutes: 5
+#   displayName: AML short pipeline test
+#   pool:
+#     name: deepseismicagentpool
+#   steps:
+#   - bash: |      
+#       source activate seismic-interpretation
+#       # TODO: OPTIONAL! Add a job which launches entire training pipeline for 1 epoch of training (train model for single epoch)
+#       # if you don't want this then delete the entire job from this file
+#       python interpretation/deepseismic_interpretation/azureml_pipelines/dev/kickoff_train_pipeline.py --experiment=DEV-train-pipeline-name --orchestrator_config=orchestrator_config="interpretation/deepseismic_interpretation/azureml_pipelines/pipeline_config.json"
+
+
@@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
@@ -31,7 +31,7 @@ def _create_checkpoint_handler(self):
     def __call__(self, engine, to_save):
         self._checkpoint_handler(engine, to_save)
         if self._snapshot_function():
-            files = glob.glob(os.path.join(self._model_save_location, self._running_model_prefix + "*"))            
+            files = glob.glob(os.path.join(self._model_save_location, self._running_model_prefix + "*"))
             name_postfix = os.path.basename(files[0]).lstrip(self._running_model_prefix)
             copyfile(
                 files[0],
 
@@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
@@ -10,6 +10,7 @@
 from cv_lib.segmentation.dutchf3.utils import np_to_tb
 from cv_lib.utils import decode_segmap
 
+
 def create_summary_writer(log_dir):
     writer = SummaryWriter(logdir=log_dir)
     return writer
@@ -20,9 +21,9 @@ def _transform_image(output_tensor):
     return torchvision.utils.make_grid(output_tensor, normalize=True, scale_each=True)
 
 
-def _transform_pred(output_tensor):
+def _transform_pred(output_tensor, n_classes):
     output_tensor = output_tensor.squeeze().cpu().numpy()
-    decoded = decode_segmap(output_tensor)
+    decoded = decode_segmap(output_tensor, n_classes)
     return torchvision.utils.make_grid(np_to_tb(decoded), normalize=False, scale_each=False)
 
 
@@ -111,5 +112,5 @@ def log_results(engine, evaluator, summary_writer, n_classes, stage):
     y_pred[mask == 255] = 255
 
     summary_writer.add_image(f"{stage}/Image", _transform_image(image), epoch)
-    summary_writer.add_image(f"{stage}/Mask", _transform_pred(mask), epoch)
-    summary_writer.add_image(f"{stage}/Pred", _transform_pred(y_pred), epoch)
+    summary_writer.add_image(f"{stage}/Mask", _transform_pred(mask, n_classes), epoch)
+    summary_writer.add_image(f"{stage}/Pred", _transform_pred(y_pred, n_classes), epoch)
@@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
@@ -37,4 +37,3 @@ def git_branch():
 def git_hash():
     repo = Repo(search_parent_directories=True)
     return repo.active_branch.commit.hexsha
-
@@ -304,4 +304,5 @@ def get_seg_model(cfg, **kwargs):
         cfg.MODEL.IN_CHANNELS == 1
     ), f"Patch deconvnet is not implemented to accept {cfg.MODEL.IN_CHANNELS} channels. Please only pass 1 for cfg.MODEL.IN_CHANNELS"
     model = patch_deconvnet_skip(n_classes=cfg.DATASET.NUM_CLASSES)
+
     return model
@@ -1,11 +1,16 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+import logging
+import os
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torchvision
 
+logger = logging.getLogger(__name__)
+
 
 class FPAv2(nn.Module):
     def __init__(self, input_dim, output_dim):
 
@@ -304,4 +304,5 @@ def get_seg_model(cfg, **kwargs):
         cfg.MODEL.IN_CHANNELS == 1
     ), f"Section deconvnet is not implemented to accept {cfg.MODEL.IN_CHANNELS} channels. Please only pass 1 for cfg.MODEL.IN_CHANNELS"
     model = section_deconvnet(n_classes=cfg.DATASET.NUM_CLASSES)
+
     return model
@@ -304,4 +304,5 @@ def get_seg_model(cfg, **kwargs):
         cfg.MODEL.IN_CHANNELS == 1
     ), f"Section deconvnet is not implemented to accept {cfg.MODEL.IN_CHANNELS} channels. Please only pass 1 for cfg.MODEL.IN_CHANNELS"
     model = section_deconvnet_skip(n_classes=cfg.DATASET.NUM_CLASSES)
+
     return model
@@ -430,21 +430,20 @@ def init_weights(
 
         if pretrained and not os.path.isfile(pretrained):
             raise FileNotFoundError(f"The file {pretrained} was not found. Please supply correct path or leave empty")
-        
+
         if os.path.isfile(pretrained):
             pretrained_dict = torch.load(pretrained)
             logger.info("=> loading pretrained model {}".format(pretrained))
             model_dict = self.state_dict()
             pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
             for k, _ in pretrained_dict.items():
-               logger.info(
-                   '=> loading {} pretrained model {}'.format(k, pretrained))
+                logger.info("=> loading {} pretrained model {}".format(k, pretrained))
             model_dict.update(pretrained_dict)
             self.load_state_dict(model_dict)
 
 
 def get_seg_model(cfg, **kwargs):
     model = HighResolutionNet(cfg, **kwargs)
-    model.init_weights(cfg.MODEL.PRETRAINED)
-
+    if "PRETRAINED" in cfg.MODEL.keys():
+        model.init_weights(cfg.MODEL.PRETRAINED)
     return model
@@ -113,4 +113,5 @@ def forward(self, x):
 
 def get_seg_model(cfg, **kwargs):
     model = UNet(cfg.MODEL.IN_CHANNELS, cfg.DATASET.NUM_CLASSES)
+
     return model
@@ -3,7 +3,6 @@
 
 import numpy as np
 
+
 def _chw_to_hwc(image_array_numpy):
     return np.moveaxis(image_array_numpy, 0, -1)
-
-
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Copyright (c) Microsoft Corporation. All rights reserved.`
	`2`	`+# Licensed under the MIT License.`