Skip to content

Commit 76f769c

Browse files
authored
Merge pull request #2530 from Azure/release-2.7.0.0
Merge release 2.7 to master
2 parents 73e1ce6 + 2b2e828 commit 76f769c

File tree

96 files changed

+5677
-2470
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+5677
-2470
lines changed

README.md

+4-2
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ ResourceDisk.EnableSwap=n
195195
ResourceDisk.EnableSwapEncryption=n
196196
ResourceDisk.SwapSizeMB=0
197197
Logs.Verbose=n
198-
Logs.Collect=n
198+
Logs.Collect=y
199199
Logs.CollectPeriod=3600
200200
OS.AllowHTTP=n
201201
OS.RootDeviceScsiTimeout=300
@@ -461,10 +461,12 @@ leverages the system logrotate functionality to rotate logs.
461461
#### __Logs.Collect__
462462

463463
_Type: Boolean_
464-
_Default: n_
464+
_Default: y_
465465

466466
If set, agent logs will be periodically collected and uploaded to a secure location for improved supportability.
467467

468+
NOTE: This feature is only supported ubuntu 16.04+; this flag will not take effect on any other distro.
469+
468470
#### __Logs.CollectPeriod__
469471

470472
_Type: Integer_

azurelinuxagent/common/AgentGlobals.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ class AgentGlobals(object):
2222
This class is used for setting AgentGlobals which can be used all throughout the Agent.
2323
"""
2424

25+
GUID_ZERO = "00000000-0000-0000-0000-000000000000"
26+
2527
#
2628
# Some modules (e.g. telemetry) require an up-to-date container ID. We update this variable each time we
2729
# fetch the goal state.
2830
#
29-
_container_id = "00000000-0000-0000-0000-000000000000"
31+
_container_id = GUID_ZERO
3032

3133
@staticmethod
3234
def get_container_id():

azurelinuxagent/common/cgroupapi.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
from azurelinuxagent.common.cgroup import CpuCgroup
2727
from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry
2828
from azurelinuxagent.common.conf import get_agent_pid_file_path
29-
from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, ExtensionOperationError
29+
from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \
30+
ExtensionOperationError
3031
from azurelinuxagent.common.future import ustr
3132
from azurelinuxagent.common.osutil import systemd
3233
from azurelinuxagent.common.utils import fileutil, shellutil
@@ -39,10 +40,12 @@
3940
CGROUP_CONTROLLERS = ["cpu", "memory"]
4041
EXTENSION_SLICE_PREFIX = "azure-vmextensions"
4142

43+
4244
class SystemdRunError(CGroupsException):
4345
"""
4446
Raised when systemd-run fails
4547
"""
48+
4649
def __init__(self, msg=None):
4750
super(SystemdRunError, self).__init__(msg)
4851

@@ -56,7 +59,9 @@ def cgroups_supported():
5659
distro_version = FlexibleVersion(distro_info[1])
5760
except ValueError:
5861
return False
59-
return distro_name.lower() == 'ubuntu' and distro_version.major >= 16
62+
return ((distro_name.lower() == 'ubuntu' and distro_version.major >= 16) or
63+
(distro_name.lower() in ("centos", "redhat") and
64+
((distro_version.major == 7 and distro_version.minor >= 8) or distro_version.major >= 8)))
6065

6166
@staticmethod
6267
def track_cgroups(extension_cgroups):
@@ -118,6 +123,7 @@ class SystemdCgroupsApi(CGroupsApi):
118123
"""
119124
Cgroups interface via systemd
120125
"""
126+
121127
def __init__(self):
122128
self._cgroup_mountpoints = None
123129
self._agent_unit_name = None
@@ -156,7 +162,7 @@ def get_cgroup_mount_points(self):
156162
memory = path
157163
self._cgroup_mountpoints = {'cpu': cpu, 'memory': memory}
158164

159-
return self._cgroup_mountpoints['cpu'], self._cgroup_mountpoints['memory']
165+
return self._cgroup_mountpoints['cpu'], self._cgroup_mountpoints['memory']
160166

161167
@staticmethod
162168
def get_process_cgroup_relative_paths(process_id):
@@ -253,7 +259,8 @@ def get_extension_slice_name(extension_name):
253259
# Since '-' is used as a separator in systemd unit names, we replace it with '_' to prevent side-effects.
254260
return EXTENSION_SLICE_PREFIX + "-" + extension_name.replace('-', '_') + ".slice"
255261

256-
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure):
262+
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
263+
error_code=ExtensionErrorCodes.PluginUnknownFailure):
257264
scope = "{0}_{1}".format(cmd_name, uuid.uuid4())
258265
extension_slice_name = self.get_extension_slice_name(extension_name)
259266
with self._systemd_run_commands_lock:
@@ -293,7 +300,8 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh
293300

294301
# Wait for process completion or timeout
295302
try:
296-
return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code)
303+
return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
304+
stderr=stderr, error_code=error_code)
297305
except ExtensionError as e:
298306
# The extension didn't terminate successfully. Determine whether it was due to systemd errors or
299307
# extension errors.
@@ -309,7 +317,8 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh
309317

310318
if isinstance(e, ExtensionOperationError):
311319
# no-member: Instance of 'ExtensionError' has no 'exit_code' member (no-member) - Disabled: e is actually an ExtensionOperationError
312-
err_msg = 'Systemd process exited with code %s and output %s' % (e.exit_code, process_output) # pylint: disable=no-member
320+
err_msg = 'Systemd process exited with code %s and output %s' % (
321+
e.exit_code, process_output) # pylint: disable=no-member
313322
else:
314323
err_msg = "Systemd timed-out, output: %s" % process_output
315324
raise SystemdRunError(err_msg)

azurelinuxagent/common/cgroupconfigurator.py

+33-25
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,13 @@
9696
"""
9797
_AGENT_THROTTLED_TIME_THRESHOLD = 120 # 2 minutes
9898

99+
99100
class DisableCgroups(object):
100101
ALL = "all"
101102
AGENT = "agent"
102103
EXTENSIONS = "extensions"
103104

105+
104106
def _log_cgroup_info(format_string, *args):
105107
message = format_string.format(*args)
106108
logger.info("[CGI] " + message)
@@ -120,6 +122,7 @@ class CGroupConfigurator(object):
120122
NOTE: with the exception of start_extension_command, none of the methods in this class
121123
raise exceptions (cgroup operations should not block extensions)
122124
"""
125+
123126
class _Impl(object):
124127
def __init__(self):
125128
self._initialized = False
@@ -165,7 +168,9 @@ def initialize(self):
165168
self.__setup_azure_slice()
166169

167170
cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers()
168-
self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice, cpu_controller_root, memory_controller_root)
171+
self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice,
172+
cpu_controller_root,
173+
memory_controller_root)
169174

170175
if self._agent_cpu_cgroup_path is not None:
171176
_log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path)
@@ -218,7 +223,7 @@ def __collect_agent_unit_files_telemetry():
218223
agent_service_name = get_osutil().get_service_name()
219224
try:
220225
fragment_path = systemd.get_unit_property(agent_service_name, "FragmentPath")
221-
if fragment_path != "/lib/systemd/system/{0}.service".format(agent_service_name):
226+
if fragment_path != systemd.get_agent_unit_file():
222227
agent_unit_files.append(fragment_path)
223228
except Exception as exception:
224229
_log_cgroup_warning("Failed to query the agent's FragmentPath: {0}", ustr(exception))
@@ -233,7 +238,8 @@ def __collect_agent_unit_files_telemetry():
233238
for unit_file in agent_unit_files:
234239
try:
235240
with open(unit_file, "r") as file_object:
236-
_log_cgroup_info("Found a custom unit file for the agent: {0}\n{1}", unit_file, file_object.read())
241+
_log_cgroup_info("Found a custom unit file for the agent: {0}\n{1}", unit_file,
242+
file_object.read())
237243
except Exception as exception:
238244
_log_cgroup_warning("Can't read {0}: {1}", unit_file, ustr(exception))
239245

@@ -269,7 +275,8 @@ def __get_cgroup_controllers(self):
269275
#
270276
cgroup2_mount_point, cgroup2_controllers = self._cgroups_api.get_cgroup2_controllers()
271277
if cgroup2_mount_point is not None:
272-
_log_cgroup_info("cgroups v2 mounted at {0}. Controllers: [{1}]", cgroup2_mount_point, cgroup2_controllers)
278+
_log_cgroup_info("cgroups v2 mounted at {0}. Controllers: [{1}]", cgroup2_mount_point,
279+
cgroup2_controllers)
273280

274281
return cpu_controller_root, memory_controller_root
275282

@@ -322,7 +329,7 @@ def __setup_azure_slice():
322329

323330
if not os.path.exists(logcollector_slice):
324331
slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=_LOGCOLLECTOR_CPU_QUOTA,
325-
memory_limit=_LOGCOLLECTOR_MEMORY_LIMIT)
332+
memory_limit=_LOGCOLLECTOR_MEMORY_LIMIT)
326333

327334
files_to_create.append((logcollector_slice, slice_contents))
328335

@@ -408,7 +415,8 @@ def __get_agent_cgroups(self, agent_slice, cpu_controller_root, memory_controlle
408415
agent_unit_name = systemd.get_agent_unit_name()
409416

410417
expected_relative_path = os.path.join(agent_slice, agent_unit_name)
411-
cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths("self")
418+
cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths(
419+
"self")
412420

413421
if cpu_cgroup_relative_path is None:
414422
_log_cgroup_warning("The agent's process is not within a CPU cgroup")
@@ -417,11 +425,11 @@ def __get_agent_cgroups(self, agent_slice, cpu_controller_root, memory_controlle
417425
_log_cgroup_info('CPUAccounting: {0}', systemd.get_unit_property(agent_unit_name, "CPUAccounting"))
418426
_log_cgroup_info('CPUQuota: {0}', systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec"))
419427
else:
420-
cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring
421428
_log_cgroup_warning(
422429
"The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]",
423430
cpu_cgroup_relative_path,
424431
expected_relative_path)
432+
cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring
425433

426434
if memory_cgroup_relative_path is None:
427435
_log_cgroup_warning("The agent's process is not within a memory cgroup")
@@ -430,11 +438,11 @@ def __get_agent_cgroups(self, agent_slice, cpu_controller_root, memory_controlle
430438
memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting")
431439
_log_cgroup_info('MemoryAccounting: {0}', memory_accounting)
432440
else:
433-
memory_cgroup_relative_path = None # Set the path to None to prevent monitoring
434441
_log_cgroup_info(
435442
"The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]",
436443
memory_cgroup_relative_path,
437444
expected_relative_path)
445+
memory_cgroup_relative_path = None # Set the path to None to prevent monitoring
438446

439447
if cpu_controller_root is not None and cpu_cgroup_relative_path is not None:
440448
agent_cpu_cgroup_path = os.path.join(cpu_controller_root, cpu_cgroup_relative_path)
@@ -462,30 +470,30 @@ def extensions_enabled(self):
462470

463471
def enable(self):
464472
if not self.supported():
465-
raise CGroupsException("Attempted to enable cgroups, but they are not supported on the current platform")
473+
raise CGroupsException(
474+
"Attempted to enable cgroups, but they are not supported on the current platform")
466475
self._agent_cgroups_enabled = True
467476
self._extensions_cgroups_enabled = True
468477
self.__set_cpu_quota(conf.get_agent_cpu_quota())
469478

470-
def disable(self, reason, disableCgroups):
479+
def disable(self, reason, disable_cgroups):
471480
# Todo: disable/reset extension when ext quotas introduced
472-
if disableCgroups == DisableCgroups.ALL: # disable all
481+
if disable_cgroups == DisableCgroups.ALL: # disable all
473482
self._agent_cgroups_enabled = False
474483
self._extensions_cgroups_enabled = False
475484
self.__reset_agent_cpu_quota()
476485
CGroupsTelemetry.reset()
477-
elif disableCgroups == DisableCgroups.AGENT: # disable agent
486+
elif disable_cgroups == DisableCgroups.AGENT: # disable agent
478487
self._agent_cgroups_enabled = False
479488
self.__reset_agent_cpu_quota()
480489
CGroupsTelemetry.stop_tracking(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))
481-
elif disableCgroups == DisableCgroups.EXTENSIONS: # disable extensions
490+
elif disable_cgroups == DisableCgroups.EXTENSIONS: # disable extensions
482491
self._extensions_cgroups_enabled = False
483492

484493
message = "[CGW] Disabling resource usage monitoring. Reason: {0}".format(reason)
485494
logger.info(message) # log as INFO for now, in the future it should be logged as WARNING
486495
add_event(op=WALAEventOperation.CGroupsDisabled, message=message, is_success=False, log_event=False)
487496

488-
489497
@staticmethod
490498
def __set_cpu_quota(quota):
491499
"""
@@ -510,6 +518,7 @@ def __reset_agent_cpu_quota():
510518
logger.info("Resetting agent's CPUQuota")
511519
if CGroupConfigurator._Impl.__try_set_cpu_quota(''): # setting an empty value resets to the default (infinity)
512520
CGroupsTelemetry.set_track_throttled_time(False)
521+
_log_cgroup_info('CPUQuota: {0}', systemd.get_unit_property(systemd.get_agent_unit_name(), "CPUQuotaPerSecUSec"))
513522

514523
@staticmethod
515524
def __try_set_cpu_quota(quota):
@@ -589,7 +598,8 @@ def _check_processes_in_agent_cgroup(self):
589598
if process in (daemon, extension_handler) or process in systemd_run_commands:
590599
continue
591600
# systemd_run_commands contains the shell that started systemd-run, so we also need to check for the parent
592-
if self._get_parent(process) in systemd_run_commands and self._get_command(process) == 'systemd-run':
601+
if self._get_parent(process) in systemd_run_commands and self._get_command(
602+
process) == 'systemd-run':
593603
continue
594604
# check if the process is a command started by the agent or a descendant of one of those commands
595605
current = process
@@ -697,7 +707,8 @@ def stop_tracking_extension_cgroups(self, extension_name):
697707
except Exception as exception:
698708
logger.info("Failed to stop tracking resource usage for the extension service: {0}", ustr(exception))
699709

700-
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure):
710+
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
711+
error_code=ExtensionErrorCodes.PluginUnknownFailure):
701712
"""
702713
Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup
703714
:param extension_name: The extension executing the command
@@ -713,9 +724,12 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh
713724
"""
714725
if self.enabled():
715726
try:
716-
return self._cgroups_api.start_extension_command(extension_name, command, cmd_name, timeout, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, error_code=error_code)
727+
return self._cgroups_api.start_extension_command(extension_name, command, cmd_name, timeout,
728+
shell=shell, cwd=cwd, env=env, stdout=stdout,
729+
stderr=stderr, error_code=error_code)
717730
except SystemdRunError as exception:
718-
reason = 'Failed to start {0} using systemd-run, will try invoking the extension directly. Error: {1}'.format(extension_name, ustr(exception))
731+
reason = 'Failed to start {0} using systemd-run, will try invoking the extension directly. Error: {1}'.format(
732+
extension_name, ustr(exception))
719733
self.disable(reason, DisableCgroups.ALL)
720734
# fall-through and re-invoke the extension
721735

@@ -735,7 +749,7 @@ def setup_extension_slice(self, extension_name):
735749
if self.enabled():
736750
unit_file_install_path = systemd.get_unit_file_install_path()
737751
extension_slice_path = os.path.join(unit_file_install_path,
738-
SystemdCgroupsApi.get_extension_slice_name(extension_name))
752+
SystemdCgroupsApi.get_extension_slice_name(extension_name))
739753
try:
740754
slice_contents = _EXTENSION_SLICE_CONTENTS.format(extension_name=extension_name)
741755
CGroupConfigurator._Impl.__create_unit_file(extension_slice_path, slice_contents)
@@ -755,12 +769,6 @@ def remove_extension_slice(self, extension_name):
755769
if os.path.exists(extension_slice_path):
756770
self.stop_tracking_extension_cgroups(extension_name)
757771
CGroupConfigurator._Impl.__cleanup_unit_file(extension_slice_path)
758-
# stop the unit gracefully; the extensions slices will be removed from /sys/fs/cgroup path
759-
try:
760-
logger.info("Executing systemctl stop {0}".format(extension_slice_name))
761-
shellutil.run_command(["systemctl", "stop", extension_slice_name])
762-
except Exception as exception:
763-
_log_cgroup_warning("systemctl stop failed (remove slice): {0}", ustr(exception))
764772

765773
def set_extension_services_cpu_memory_quota(self, services_list):
766774
"""

0 commit comments

Comments
 (0)