vm-repair: Fix Encrypted Ubuntu Bug, add LVM support (Azure#2339)

* Adding code for unlock and added encryptformatall * removing temp files * Changes to logging and logic * Fixing code for data disk detection on RedHat * Create function for _invoke_run_command * add NF for finding the data disk with awk * Adding error traps to allow stderr to be printed * Added function for validating tag if secret is changed * Tidy code Co-authored-by: Ubuntu <ceschi@ubuntudevrepair.mygpwaslj2re3c1zv23vyhla2c.bx.internal.cloudapp.net> Co-authored-by: Francisco Franceschi <[email protected]> Co-authored-by: root <root@ubuntu18dev1.vdtdwq5rkvyuhaj2iq0ntqetub.syx.internal.cloudapp.net>
wangzelin007 · Sep 14, 2020 · c906263 · c906263
1 parent db1a4b3
commit c906263
Show file tree

Hide file tree

Showing 9 changed files with 296 additions and 123 deletions.
diff --git a/azure-cli-extensions.pyproj b/azure-cli-extensions.pyproj
@@ -4756,7 +4756,7 @@
     <Content Include="src\virtual-wan\azext_vwan\azext_metadata.json" />
     <Content Include="src\virtual-wan\readme.md" />
     <Content Include="src\vm-repair\azext_vm_repair\scripts\linux-run-driver.sh" />
-    <Content Include="src\vm-repair\azext_vm_repair\scripts\mount_encrypted_disk.sh" />
+    <Content Include="src\vm-repair\azext_vm_repair\scripts\mount-encrypted-disk.sh" />
     <Content Include="src\vm-repair\azext_vm_repair\scripts\win-run-driver.ps1" />
     <Content Include="src\vm-repair\README.md" />
     <Content Include="src\vm-repair\setup.cfg" />

diff --git a/src/vm-repair/azext_vm_repair/_validators.py b/src/vm-repair/azext_vm_repair/_validators.py
@@ -59,7 +59,7 @@ def validate_create(cmd, namespace):
         namespace.repair_group_name = 'repair-' + namespace.vm_name + '-' + timestamp
 
     # Check encrypted disk
-    encryption_type, _, _ = _fetch_encryption_settings(source_vm)
+    encryption_type, _, _, _ = _fetch_encryption_settings(source_vm)
     # Currently only supporting single pass
     if encryption_type in (Encryption.SINGLE_WITH_KEK, Encryption.SINGLE_WITHOUT_KEK):
         if not namespace.unlock_encrypted_vm:

diff --git a/src/vm-repair/azext_vm_repair/custom.py b/src/vm-repair/azext_vm_repair/custom.py
@@ -4,10 +4,7 @@
 # --------------------------------------------------------------------------------------------
 
 # pylint: disable=line-too-long, too-many-locals, too-many-statements, broad-except, too-many-branches
-import json
 import timeit
-import os
-import pkgutil
 import traceback
 import requests
 from knack.log import get_logger
@@ -33,6 +30,7 @@
     _check_script_succeeded,
     _fetch_disk_info,
     _unlock_singlepass_encrypted_disk,
+    _invoke_run_command
 )
 from .exceptions import AzCommandError, SkuNotAvailableError, UnmanagedDiskCopyError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError
 logger = get_logger(__name__)
@@ -106,7 +104,7 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern
 
             # Handle encrypted VM cases
             if unlock_encrypted_vm:
-                _unlock_singlepass_encrypted_disk(source_vm, is_linux, repair_group_name, repair_vm_name)
+                _unlock_singlepass_encrypted_disk(source_vm, resource_group_name, repair_vm_name, repair_group_name, copy_disk_name, is_linux)
 
         # UNMANAGED DISK
         else:
@@ -301,57 +299,44 @@ def run(cmd, vm_name, resource_group_name, run_id=None, repair_vm_id=None, custo
     # Init command helper object
     command = command_helper(logger, cmd, 'vm repair run')
 
-    REPAIR_DIR_NAME = 'azext_vm_repair'
-    SCRIPTS_DIR_NAME = 'scripts'
     LINUX_RUN_SCRIPT_NAME = 'linux-run-driver.sh'
     WINDOWS_RUN_SCRIPT_NAME = 'win-run-driver.ps1'
-    RUN_COMMAND_RUN_SHELL_ID = 'RunShellScript'
-    RUN_COMMAND_RUN_PS_ID = 'RunPowerShellScript'
 
     try:
         # Fetch VM data
         source_vm = get_vm(cmd, resource_group_name, vm_name)
-
-        # Build absoulte path of driver script
-        loader = pkgutil.get_loader(REPAIR_DIR_NAME)
-        mod = loader.load_module(REPAIR_DIR_NAME)
-        rootpath = os.path.dirname(mod.__file__)
         is_linux = _is_linux_os(source_vm)
+
         if is_linux:
-            run_script = os.path.join(rootpath, SCRIPTS_DIR_NAME, LINUX_RUN_SCRIPT_NAME)
-            command_id = RUN_COMMAND_RUN_SHELL_ID
+            script_name = LINUX_RUN_SCRIPT_NAME
         else:
-            run_script = os.path.join(rootpath, SCRIPTS_DIR_NAME, WINDOWS_RUN_SCRIPT_NAME)
-            command_id = RUN_COMMAND_RUN_PS_ID
+            script_name = WINDOWS_RUN_SCRIPT_NAME
 
         # If run_on_repair is False, then repair_vm is the source_vm (scripts run directly on source vm)
         repair_vm_id = parse_resource_id(repair_vm_id)
         repair_vm_name = repair_vm_id['name']
         repair_resource_group = repair_vm_id['resource_group']
 
-        repair_run_command = 'az vm run-command invoke -g {rg} -n {vm} --command-id {command_id} ' \
-                             '--scripts "@{run_script}" -o json' \
-                             .format(rg=repair_resource_group, vm=repair_vm_name, command_id=command_id, run_script=run_script)
+        run_command_params = []
+        additional_scripts = []
 
         # Normal scenario with run id
         if not custom_script_file:
             # Fetch run path from GitHub
             repair_script_path = _fetch_run_script_path(run_id)
-            repair_run_command += ' --parameters script_path="./{repair_script}"'.format(repair_script=repair_script_path)
+            run_command_params.append('script_path="./{}"'.format(repair_script_path))
         # Custom script scenario for script testers
         else:
-            # no-op run id
-            repair_run_command += ' "@{custom_file}" --parameters script_path=no-op'.format(custom_file=custom_script_file)
+            run_command_params.append('script_path=no-op')
+            additional_scripts.append(custom_script_file)
 
         # Append Parameters
         if parameters:
             if is_linux:
                 param_string = _process_bash_parameters(parameters)
             else:
                 param_string = _process_ps_parameters(parameters)
-            # Work around for run-command bug, unexpected behavior with space characters
-            param_string = param_string.replace(' ', '%20')
-            repair_run_command += ' params="{}"'.format(param_string)
+            run_command_params.append('params="{}"'.format(param_string))
         if run_on_repair:
             vm_string = 'repair VM'
         else:
@@ -360,18 +345,8 @@ def run(cmd, vm_name, resource_group_name, run_id=None, repair_vm_id=None, custo
 
         # Run script and measure script run-time
         script_start_time = timeit.default_timer()
-        return_str = _call_az_command(repair_run_command)
+        stdout, stderr = _invoke_run_command(script_name, repair_vm_name, repair_resource_group, is_linux, run_command_params, additional_scripts)
         command.script.run_time = timeit.default_timer() - script_start_time
-
-        # Extract stdout and stderr, if stderr exists then possible error
-        run_command_return = json.loads(return_str)
-        if is_linux:
-            run_command_message = run_command_return['value'][0]['message'].split('[stdout]')[1].split('[stderr]')
-            stdout = run_command_message[0].strip('\n')
-            stderr = run_command_message[1].strip('\n')
-        else:
-            stdout = run_command_return['value'][0]['message']
-            stderr = run_command_return['value'][1]['message']
         logger.debug("stderr: %s", stderr)
 
         # Parse through stdout to populate log properties: 'level', 'message'

diff --git a/src/vm-repair/azext_vm_repair/repair_utils.py b/src/vm-repair/azext_vm_repair/repair_utils.py
@@ -67,6 +67,54 @@ def _call_az_command(command_string, run_async=False, secure_params=None):
     return None
 
 
+def _invoke_run_command(script_name, vm_name, rg_name, is_linux, parameters=None, additional_custom_scripts=None):
+    """
+    Use azure run command to run the scripts within the vm-repair/scripts file and return stdout, stderr.
+    """
+
+    REPAIR_DIR_NAME = 'azext_vm_repair'
+    SCRIPTS_DIR_NAME = 'scripts'
+    RUN_COMMAND_RUN_SHELL_ID = 'RunShellScript'
+    RUN_COMMAND_RUN_PS_ID = 'RunPowerShellScript'
+
+    # Build absoulte path of driver script
+    loader = pkgutil.get_loader(REPAIR_DIR_NAME)
+    mod = loader.load_module(REPAIR_DIR_NAME)
+    rootpath = os.path.dirname(mod.__file__)
+    run_script = os.path.join(rootpath, SCRIPTS_DIR_NAME, script_name)
+
+    if is_linux:
+        command_id = RUN_COMMAND_RUN_SHELL_ID
+    else:
+        command_id = RUN_COMMAND_RUN_PS_ID
+
+    # Process script list to scripts string
+    additional_scripts_string = ''
+    if additional_custom_scripts:
+        for script in additional_custom_scripts:
+            additional_scripts_string += ' "@{script_name}"'.format(script_name=script)
+
+    run_command = 'az vm run-command invoke -g {rg} -n {vm} --command-id {command_id} ' \
+                  '--scripts @"{run_script}"{additional_scripts} -o json' \
+                  .format(rg=rg_name, vm=vm_name, command_id=command_id, run_script=run_script, additional_scripts=additional_scripts_string)
+    if parameters:
+        run_command += " --parameters {params}".format(params=' '.join(parameters))
+    return_str = _call_az_command(run_command)
+
+    # Extract stdout and stderr, if stderr exists then possible error
+    run_command_return = loads(return_str)
+
+    if is_linux:
+        run_command_message = run_command_return['value'][0]['message'].split('[stdout]')[1].split('[stderr]')
+        stdout = run_command_message[0].strip('\n')
+        stderr = run_command_message[1].strip('\n')
+    else:
+        stdout = run_command_return['value'][0]['message']
+        stderr = run_command_return['value'][1]['message']
+
+    return stdout, stderr
+
+
 def _get_current_vmrepair_version():
     from azure.cli.core.extension.operations import list_extensions
     version = [ext['version'] for ext in list_extensions() if ext['name'] == 'vm-repair']
@@ -186,27 +234,46 @@ def _list_resource_ids_in_rg(resource_group_name):
 def _fetch_encryption_settings(source_vm):
     key_vault = None
     kekurl = None
+    secreturl = None
     if source_vm.storage_profile.os_disk.encryption_settings is not None:
-        return Encryption.DUAL, key_vault, kekurl
+        return Encryption.DUAL, key_vault, kekurl, secreturl
     # Unmanaged disk only support dual
     if not _uses_managed_disk(source_vm):
-        return Encryption.NONE, key_vault, kekurl
+        return Encryption.NONE, key_vault, kekurl, secreturl
 
     disk_id = source_vm.storage_profile.os_disk.managed_disk.id
-    show_disk_command = 'az disk show --id {i} --query [encryptionSettingsCollection,encryptionSettingsCollection.encryptionSettings[].diskEncryptionKey.sourceVault.id,encryptionSettingsCollection.encryptionSettings[].keyEncryptionKey.keyUrl] -o json'.format(i=disk_id)
-    encryption_type, key_vault, kekurl = loads(_call_az_command(show_disk_command))
+    show_disk_command = 'az disk show --id {i} --query [encryptionSettingsCollection,encryptionSettingsCollection.encryptionSettings[].diskEncryptionKey.sourceVault.id,encryptionSettingsCollection.encryptionSettings[].keyEncryptionKey.keyUrl,encryptionSettingsCollection.encryptionSettings[].diskEncryptionKey.secretUrl] -o json' \
+                        .format(i=disk_id)
+    encryption_type, key_vault, kekurl, secreturl = loads(_call_az_command(show_disk_command))
     if [encryption_type, key_vault, kekurl] == [None, None, None]:
-        return Encryption.NONE, key_vault, kekurl
+        return Encryption.NONE, key_vault, kekurl, secreturl
     if kekurl == []:
-        key_vault = key_vault[0]
-        return Encryption.SINGLE_WITHOUT_KEK, key_vault, kekurl
-    key_vault, kekurl = key_vault[0], kekurl[0]
-    return Encryption.SINGLE_WITH_KEK, key_vault, kekurl
+        key_vault, secreturl = key_vault[0], secreturl[0]
+        return Encryption.SINGLE_WITHOUT_KEK, key_vault, kekurl, secreturl
+    key_vault, kekurl, secreturl = key_vault[0], kekurl[0], secreturl[0]
+    return Encryption.SINGLE_WITH_KEK, key_vault, kekurl, secreturl
+
+
+def _secret_tag_check(resource_group_name, copy_disk_name, secreturl):
+    DEFAULT_LINUXPASSPHRASE_FILENAME = 'LinuxPassPhraseFileName'
+    show_disk_command = 'az disk show -g {g} -n {n} --query encryptionSettingsCollection.encryptionSettings[].diskEncryptionKey.secretUrl -o json' \
+                        .format(n=copy_disk_name, g=resource_group_name)
+    secreturl_new = loads(_call_az_command(show_disk_command))[0]
+    if secreturl == secreturl_new:
+        logger.debug('Secret urls are same. Skipping the tag check...')
+    else:
+        logger.debug('Secret urls are not same. Changing the tag...')
+        show_tag_command = 'az keyvault secret show --id {securl} --query [tags.DiskEncryptionKeyEncryptionAlgorithm,tags.DiskEncryptionKeyEncryptionKeyURL] -o json' \
+                           .format(securl=secreturl_new)
+        algorithm, keyurl = loads(_call_az_command(show_tag_command))
+        set_tag_command = 'az keyvault secret set-attributes --tags DiskEncryptionKeyFileName={keyfile} DiskEncryptionKeyEncryptionAlgorithm={alg} DiskEncryptionKeyEncryptionKeyURL={kekurl} --id {securl}' \
+                          .format(keyfile=DEFAULT_LINUXPASSPHRASE_FILENAME, alg=algorithm, kekurl=keyurl, securl=secreturl_new)
+        _call_az_command(set_tag_command)
 
 
-def _unlock_singlepass_encrypted_disk(source_vm, is_linux, repair_group_name, repair_vm_name):
+def _unlock_singlepass_encrypted_disk(source_vm, resource_group_name, repair_vm_name, repair_group_name, copy_disk_name, is_linux):
     # Installs the extension on repair VM and mounts the disk after unlocking.
-    encryption_type, key_vault, kekurl = _fetch_encryption_settings(source_vm)
+    encryption_type, key_vault, kekurl, secreturl = _fetch_encryption_settings(source_vm)
     if is_linux:
         volume_type = 'DATA'
     else:
@@ -219,36 +286,33 @@ def _unlock_singlepass_encrypted_disk(source_vm, is_linux, repair_group_name, re
         elif encryption_type is Encryption.SINGLE_WITHOUT_KEK:
             install_ade_extension_command = 'az vm encryption enable --disk-encryption-keyvault {vault} --name {repair} --resource-group {g} --volume-type {volume}' \
                                             .format(g=repair_group_name, repair=repair_vm_name, vault=key_vault, volume=volume_type)
+        # Add format-all flag for linux vms
+        if is_linux:
+            install_ade_extension_command += " --encrypt-format-all"
         logger.info('Unlocking attached copied disk...')
         _call_az_command(install_ade_extension_command)
         # Linux VM encryption extension has a bug and we need to manually unlock and mount its disk
         if is_linux:
+            # Validating secret tag and setting original tag if it got changed
+            _secret_tag_check(resource_group_name, copy_disk_name, secreturl)
             logger.debug("Manually unlocking and mounting disk for Linux VMs.")
-            _manually_unlock_mount_encrypted_disk(repair_group_name, repair_vm_name)
+            _manually_unlock_mount_encrypted_disk(repair_vm_name, repair_group_name)
     except AzCommandError as azCommandError:
         error_message = str(azCommandError)
         # Linux VM encryption extension bug where it fails and then continue to mount disk manually
         if is_linux and "Failed to encrypt data volumes with error" in error_message:
             logger.debug("Expected bug for linux VMs. Ignoring error.")
-            _manually_unlock_mount_encrypted_disk(repair_group_name, repair_vm_name)
+            # Validating secret tag and setting original tag if it got changed
+            _secret_tag_check(resource_group_name, copy_disk_name, secreturl)
+            _manually_unlock_mount_encrypted_disk(repair_vm_name, repair_group_name)
         else:
             raise
 
 
-def _manually_unlock_mount_encrypted_disk(repair_group_name, repair_vm_name):
+def _manually_unlock_mount_encrypted_disk(repair_vm_name, repair_group_name):
     # Unlocks the disk using the phasephrase and mounts it on the repair VM.
-    REPAIR_DIR_NAME = 'azext_vm_repair'
-    SCRIPTS_DIR_NAME = 'scripts'
     LINUX_RUN_SCRIPT_NAME = 'mount-encrypted-disk.sh'
-    command_id = 'RunShellScript'
-    loader = pkgutil.get_loader(REPAIR_DIR_NAME)
-    mod = loader.load_module(REPAIR_DIR_NAME)
-    rootpath = os.path.dirname(mod.__file__)
-    run_script = os.path.join(rootpath, SCRIPTS_DIR_NAME, LINUX_RUN_SCRIPT_NAME)
-    mount_disk_command = 'az vm run-command invoke -g {rg} -n {vm} --command-id {command_id} ' \
-                         '--scripts "@{run_script}" -o json' \
-                         .format(rg=repair_group_name, vm=repair_vm_name, command_id=command_id, run_script=run_script)
-    _call_az_command(mount_disk_command)
+    return _invoke_run_command(LINUX_RUN_SCRIPT_NAME, repair_vm_name, repair_group_name, True)
 
 
 def _fetch_compatible_windows_os_urn(source_vm):

diff --git a/src/vm-repair/azext_vm_repair/scripts/linux-run-driver.sh b/src/vm-repair/azext_vm_repair/scripts/linux-run-driver.sh
@@ -19,9 +19,7 @@ if [ $(ls | wc -l) -eq 3 ]; then
 		# Normal GitHub script scenario
 		if [ "$1" != "no-op" ]; then
 			chmod u+x $1 &&
-			# Work around for passing space characters through run-command
-			params=$(echo "$2" | sed "s/%20/ /") &&
-			command_string="$1 $params" &&
+			command_string="$*" &&
 			bash -e  $command_string >> $logFile
 		else # Custom script scenario
 			# Call the same script but it will only run the appended custom scripts