Skip to content

Commit

Permalink
vm-repair: Fix Encrypted Ubuntu Bug, add LVM support (Azure#2339)
Browse files Browse the repository at this point in the history
* Adding code for unlock and added encryptformatall

* removing temp files

* Changes to logging and logic

* Fixing code for data disk detection on RedHat

* Create function for _invoke_run_command

* add NF for finding the data disk with awk

* Adding error traps to allow stderr to be printed

* Added function for validating tag if secret is changed

* Tidy code

Co-authored-by: Ubuntu <ceschi@ubuntudevrepair.mygpwaslj2re3c1zv23vyhla2c.bx.internal.cloudapp.net>
Co-authored-by: Francisco Franceschi <[email protected]>
Co-authored-by: root <root@ubuntu18dev1.vdtdwq5rkvyuhaj2iq0ntqetub.syx.internal.cloudapp.net>
  • Loading branch information
4 people authored Sep 14, 2020
1 parent db1a4b3 commit c906263
Show file tree
Hide file tree
Showing 9 changed files with 296 additions and 123 deletions.
2 changes: 1 addition & 1 deletion azure-cli-extensions.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -4756,7 +4756,7 @@
<Content Include="src\virtual-wan\azext_vwan\azext_metadata.json" />
<Content Include="src\virtual-wan\readme.md" />
<Content Include="src\vm-repair\azext_vm_repair\scripts\linux-run-driver.sh" />
<Content Include="src\vm-repair\azext_vm_repair\scripts\mount_encrypted_disk.sh" />
<Content Include="src\vm-repair\azext_vm_repair\scripts\mount-encrypted-disk.sh" />
<Content Include="src\vm-repair\azext_vm_repair\scripts\win-run-driver.ps1" />
<Content Include="src\vm-repair\README.md" />
<Content Include="src\vm-repair\setup.cfg" />
Expand Down
2 changes: 1 addition & 1 deletion src/vm-repair/azext_vm_repair/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def validate_create(cmd, namespace):
namespace.repair_group_name = 'repair-' + namespace.vm_name + '-' + timestamp

# Check encrypted disk
encryption_type, _, _ = _fetch_encryption_settings(source_vm)
encryption_type, _, _, _ = _fetch_encryption_settings(source_vm)
# Currently only supporting single pass
if encryption_type in (Encryption.SINGLE_WITH_KEK, Encryption.SINGLE_WITHOUT_KEK):
if not namespace.unlock_encrypted_vm:
Expand Down
49 changes: 12 additions & 37 deletions src/vm-repair/azext_vm_repair/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
# --------------------------------------------------------------------------------------------

# pylint: disable=line-too-long, too-many-locals, too-many-statements, broad-except, too-many-branches
import json
import timeit
import os
import pkgutil
import traceback
import requests
from knack.log import get_logger
Expand All @@ -33,6 +30,7 @@
_check_script_succeeded,
_fetch_disk_info,
_unlock_singlepass_encrypted_disk,
_invoke_run_command
)
from .exceptions import AzCommandError, SkuNotAvailableError, UnmanagedDiskCopyError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError
logger = get_logger(__name__)
Expand Down Expand Up @@ -106,7 +104,7 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern

# Handle encrypted VM cases
if unlock_encrypted_vm:
_unlock_singlepass_encrypted_disk(source_vm, is_linux, repair_group_name, repair_vm_name)
_unlock_singlepass_encrypted_disk(source_vm, resource_group_name, repair_vm_name, repair_group_name, copy_disk_name, is_linux)

# UNMANAGED DISK
else:
Expand Down Expand Up @@ -301,57 +299,44 @@ def run(cmd, vm_name, resource_group_name, run_id=None, repair_vm_id=None, custo
# Init command helper object
command = command_helper(logger, cmd, 'vm repair run')

REPAIR_DIR_NAME = 'azext_vm_repair'
SCRIPTS_DIR_NAME = 'scripts'
LINUX_RUN_SCRIPT_NAME = 'linux-run-driver.sh'
WINDOWS_RUN_SCRIPT_NAME = 'win-run-driver.ps1'
RUN_COMMAND_RUN_SHELL_ID = 'RunShellScript'
RUN_COMMAND_RUN_PS_ID = 'RunPowerShellScript'

try:
# Fetch VM data
source_vm = get_vm(cmd, resource_group_name, vm_name)

# Build absoulte path of driver script
loader = pkgutil.get_loader(REPAIR_DIR_NAME)
mod = loader.load_module(REPAIR_DIR_NAME)
rootpath = os.path.dirname(mod.__file__)
is_linux = _is_linux_os(source_vm)

if is_linux:
run_script = os.path.join(rootpath, SCRIPTS_DIR_NAME, LINUX_RUN_SCRIPT_NAME)
command_id = RUN_COMMAND_RUN_SHELL_ID
script_name = LINUX_RUN_SCRIPT_NAME
else:
run_script = os.path.join(rootpath, SCRIPTS_DIR_NAME, WINDOWS_RUN_SCRIPT_NAME)
command_id = RUN_COMMAND_RUN_PS_ID
script_name = WINDOWS_RUN_SCRIPT_NAME

# If run_on_repair is False, then repair_vm is the source_vm (scripts run directly on source vm)
repair_vm_id = parse_resource_id(repair_vm_id)
repair_vm_name = repair_vm_id['name']
repair_resource_group = repair_vm_id['resource_group']

repair_run_command = 'az vm run-command invoke -g {rg} -n {vm} --command-id {command_id} ' \
'--scripts "@{run_script}" -o json' \
.format(rg=repair_resource_group, vm=repair_vm_name, command_id=command_id, run_script=run_script)
run_command_params = []
additional_scripts = []

# Normal scenario with run id
if not custom_script_file:
# Fetch run path from GitHub
repair_script_path = _fetch_run_script_path(run_id)
repair_run_command += ' --parameters script_path="./{repair_script}"'.format(repair_script=repair_script_path)
run_command_params.append('script_path="./{}"'.format(repair_script_path))
# Custom script scenario for script testers
else:
# no-op run id
repair_run_command += ' "@{custom_file}" --parameters script_path=no-op'.format(custom_file=custom_script_file)
run_command_params.append('script_path=no-op')
additional_scripts.append(custom_script_file)

# Append Parameters
if parameters:
if is_linux:
param_string = _process_bash_parameters(parameters)
else:
param_string = _process_ps_parameters(parameters)
# Work around for run-command bug, unexpected behavior with space characters
param_string = param_string.replace(' ', '%20')
repair_run_command += ' params="{}"'.format(param_string)
run_command_params.append('params="{}"'.format(param_string))
if run_on_repair:
vm_string = 'repair VM'
else:
Expand All @@ -360,18 +345,8 @@ def run(cmd, vm_name, resource_group_name, run_id=None, repair_vm_id=None, custo

# Run script and measure script run-time
script_start_time = timeit.default_timer()
return_str = _call_az_command(repair_run_command)
stdout, stderr = _invoke_run_command(script_name, repair_vm_name, repair_resource_group, is_linux, run_command_params, additional_scripts)
command.script.run_time = timeit.default_timer() - script_start_time

# Extract stdout and stderr, if stderr exists then possible error
run_command_return = json.loads(return_str)
if is_linux:
run_command_message = run_command_return['value'][0]['message'].split('[stdout]')[1].split('[stderr]')
stdout = run_command_message[0].strip('\n')
stderr = run_command_message[1].strip('\n')
else:
stdout = run_command_return['value'][0]['message']
stderr = run_command_return['value'][1]['message']
logger.debug("stderr: %s", stderr)

# Parse through stdout to populate log properties: 'level', 'message'
Expand Down
114 changes: 89 additions & 25 deletions src/vm-repair/azext_vm_repair/repair_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,54 @@ def _call_az_command(command_string, run_async=False, secure_params=None):
return None


def _invoke_run_command(script_name, vm_name, rg_name, is_linux, parameters=None, additional_custom_scripts=None):
"""
Use azure run command to run the scripts within the vm-repair/scripts file and return stdout, stderr.
"""

REPAIR_DIR_NAME = 'azext_vm_repair'
SCRIPTS_DIR_NAME = 'scripts'
RUN_COMMAND_RUN_SHELL_ID = 'RunShellScript'
RUN_COMMAND_RUN_PS_ID = 'RunPowerShellScript'

# Build absoulte path of driver script
loader = pkgutil.get_loader(REPAIR_DIR_NAME)
mod = loader.load_module(REPAIR_DIR_NAME)
rootpath = os.path.dirname(mod.__file__)
run_script = os.path.join(rootpath, SCRIPTS_DIR_NAME, script_name)

if is_linux:
command_id = RUN_COMMAND_RUN_SHELL_ID
else:
command_id = RUN_COMMAND_RUN_PS_ID

# Process script list to scripts string
additional_scripts_string = ''
if additional_custom_scripts:
for script in additional_custom_scripts:
additional_scripts_string += ' "@{script_name}"'.format(script_name=script)

run_command = 'az vm run-command invoke -g {rg} -n {vm} --command-id {command_id} ' \
'--scripts @"{run_script}"{additional_scripts} -o json' \
.format(rg=rg_name, vm=vm_name, command_id=command_id, run_script=run_script, additional_scripts=additional_scripts_string)
if parameters:
run_command += " --parameters {params}".format(params=' '.join(parameters))
return_str = _call_az_command(run_command)

# Extract stdout and stderr, if stderr exists then possible error
run_command_return = loads(return_str)

if is_linux:
run_command_message = run_command_return['value'][0]['message'].split('[stdout]')[1].split('[stderr]')
stdout = run_command_message[0].strip('\n')
stderr = run_command_message[1].strip('\n')
else:
stdout = run_command_return['value'][0]['message']
stderr = run_command_return['value'][1]['message']

return stdout, stderr


def _get_current_vmrepair_version():
from azure.cli.core.extension.operations import list_extensions
version = [ext['version'] for ext in list_extensions() if ext['name'] == 'vm-repair']
Expand Down Expand Up @@ -186,27 +234,46 @@ def _list_resource_ids_in_rg(resource_group_name):
def _fetch_encryption_settings(source_vm):
key_vault = None
kekurl = None
secreturl = None
if source_vm.storage_profile.os_disk.encryption_settings is not None:
return Encryption.DUAL, key_vault, kekurl
return Encryption.DUAL, key_vault, kekurl, secreturl
# Unmanaged disk only support dual
if not _uses_managed_disk(source_vm):
return Encryption.NONE, key_vault, kekurl
return Encryption.NONE, key_vault, kekurl, secreturl

disk_id = source_vm.storage_profile.os_disk.managed_disk.id
show_disk_command = 'az disk show --id {i} --query [encryptionSettingsCollection,encryptionSettingsCollection.encryptionSettings[].diskEncryptionKey.sourceVault.id,encryptionSettingsCollection.encryptionSettings[].keyEncryptionKey.keyUrl] -o json'.format(i=disk_id)
encryption_type, key_vault, kekurl = loads(_call_az_command(show_disk_command))
show_disk_command = 'az disk show --id {i} --query [encryptionSettingsCollection,encryptionSettingsCollection.encryptionSettings[].diskEncryptionKey.sourceVault.id,encryptionSettingsCollection.encryptionSettings[].keyEncryptionKey.keyUrl,encryptionSettingsCollection.encryptionSettings[].diskEncryptionKey.secretUrl] -o json' \
.format(i=disk_id)
encryption_type, key_vault, kekurl, secreturl = loads(_call_az_command(show_disk_command))
if [encryption_type, key_vault, kekurl] == [None, None, None]:
return Encryption.NONE, key_vault, kekurl
return Encryption.NONE, key_vault, kekurl, secreturl
if kekurl == []:
key_vault = key_vault[0]
return Encryption.SINGLE_WITHOUT_KEK, key_vault, kekurl
key_vault, kekurl = key_vault[0], kekurl[0]
return Encryption.SINGLE_WITH_KEK, key_vault, kekurl
key_vault, secreturl = key_vault[0], secreturl[0]
return Encryption.SINGLE_WITHOUT_KEK, key_vault, kekurl, secreturl
key_vault, kekurl, secreturl = key_vault[0], kekurl[0], secreturl[0]
return Encryption.SINGLE_WITH_KEK, key_vault, kekurl, secreturl


def _secret_tag_check(resource_group_name, copy_disk_name, secreturl):
DEFAULT_LINUXPASSPHRASE_FILENAME = 'LinuxPassPhraseFileName'
show_disk_command = 'az disk show -g {g} -n {n} --query encryptionSettingsCollection.encryptionSettings[].diskEncryptionKey.secretUrl -o json' \
.format(n=copy_disk_name, g=resource_group_name)
secreturl_new = loads(_call_az_command(show_disk_command))[0]
if secreturl == secreturl_new:
logger.debug('Secret urls are same. Skipping the tag check...')
else:
logger.debug('Secret urls are not same. Changing the tag...')
show_tag_command = 'az keyvault secret show --id {securl} --query [tags.DiskEncryptionKeyEncryptionAlgorithm,tags.DiskEncryptionKeyEncryptionKeyURL] -o json' \
.format(securl=secreturl_new)
algorithm, keyurl = loads(_call_az_command(show_tag_command))
set_tag_command = 'az keyvault secret set-attributes --tags DiskEncryptionKeyFileName={keyfile} DiskEncryptionKeyEncryptionAlgorithm={alg} DiskEncryptionKeyEncryptionKeyURL={kekurl} --id {securl}' \
.format(keyfile=DEFAULT_LINUXPASSPHRASE_FILENAME, alg=algorithm, kekurl=keyurl, securl=secreturl_new)
_call_az_command(set_tag_command)


def _unlock_singlepass_encrypted_disk(source_vm, is_linux, repair_group_name, repair_vm_name):
def _unlock_singlepass_encrypted_disk(source_vm, resource_group_name, repair_vm_name, repair_group_name, copy_disk_name, is_linux):
# Installs the extension on repair VM and mounts the disk after unlocking.
encryption_type, key_vault, kekurl = _fetch_encryption_settings(source_vm)
encryption_type, key_vault, kekurl, secreturl = _fetch_encryption_settings(source_vm)
if is_linux:
volume_type = 'DATA'
else:
Expand All @@ -219,36 +286,33 @@ def _unlock_singlepass_encrypted_disk(source_vm, is_linux, repair_group_name, re
elif encryption_type is Encryption.SINGLE_WITHOUT_KEK:
install_ade_extension_command = 'az vm encryption enable --disk-encryption-keyvault {vault} --name {repair} --resource-group {g} --volume-type {volume}' \
.format(g=repair_group_name, repair=repair_vm_name, vault=key_vault, volume=volume_type)
# Add format-all flag for linux vms
if is_linux:
install_ade_extension_command += " --encrypt-format-all"
logger.info('Unlocking attached copied disk...')
_call_az_command(install_ade_extension_command)
# Linux VM encryption extension has a bug and we need to manually unlock and mount its disk
if is_linux:
# Validating secret tag and setting original tag if it got changed
_secret_tag_check(resource_group_name, copy_disk_name, secreturl)
logger.debug("Manually unlocking and mounting disk for Linux VMs.")
_manually_unlock_mount_encrypted_disk(repair_group_name, repair_vm_name)
_manually_unlock_mount_encrypted_disk(repair_vm_name, repair_group_name)
except AzCommandError as azCommandError:
error_message = str(azCommandError)
# Linux VM encryption extension bug where it fails and then continue to mount disk manually
if is_linux and "Failed to encrypt data volumes with error" in error_message:
logger.debug("Expected bug for linux VMs. Ignoring error.")
_manually_unlock_mount_encrypted_disk(repair_group_name, repair_vm_name)
# Validating secret tag and setting original tag if it got changed
_secret_tag_check(resource_group_name, copy_disk_name, secreturl)
_manually_unlock_mount_encrypted_disk(repair_vm_name, repair_group_name)
else:
raise


def _manually_unlock_mount_encrypted_disk(repair_group_name, repair_vm_name):
def _manually_unlock_mount_encrypted_disk(repair_vm_name, repair_group_name):
# Unlocks the disk using the phasephrase and mounts it on the repair VM.
REPAIR_DIR_NAME = 'azext_vm_repair'
SCRIPTS_DIR_NAME = 'scripts'
LINUX_RUN_SCRIPT_NAME = 'mount-encrypted-disk.sh'
command_id = 'RunShellScript'
loader = pkgutil.get_loader(REPAIR_DIR_NAME)
mod = loader.load_module(REPAIR_DIR_NAME)
rootpath = os.path.dirname(mod.__file__)
run_script = os.path.join(rootpath, SCRIPTS_DIR_NAME, LINUX_RUN_SCRIPT_NAME)
mount_disk_command = 'az vm run-command invoke -g {rg} -n {vm} --command-id {command_id} ' \
'--scripts "@{run_script}" -o json' \
.format(rg=repair_group_name, vm=repair_vm_name, command_id=command_id, run_script=run_script)
_call_az_command(mount_disk_command)
return _invoke_run_command(LINUX_RUN_SCRIPT_NAME, repair_vm_name, repair_group_name, True)


def _fetch_compatible_windows_os_urn(source_vm):
Expand Down
4 changes: 1 addition & 3 deletions src/vm-repair/azext_vm_repair/scripts/linux-run-driver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ if [ $(ls | wc -l) -eq 3 ]; then
# Normal GitHub script scenario
if [ "$1" != "no-op" ]; then
chmod u+x $1 &&
# Work around for passing space characters through run-command
params=$(echo "$2" | sed "s/%20/ /") &&
command_string="$1 $params" &&
command_string="$*" &&
bash -e $command_string >> $logFile
else # Custom script scenario
# Call the same script but it will only run the appended custom scripts
Expand Down
Loading

0 comments on commit c906263

Please sign in to comment.