From 1322eab5d4e2afce32822d30d37980f8e1dc321f Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Fri, 1 Nov 2019 17:07:52 -0700 Subject: [PATCH] Merge ci_feature into ci_feature_prod (#291) * Bug Fixes for exceptions in telemetry, remove limit set check (#289) * Bug Fixes 10222019 * Initialize container_cpu_memory_records in fhmb * Added telemetry to investigate health exceptions * Set frozen_string_literal to true * Send event once per container when lookup is empty, or limit is an array * Unit Tests, Use RS and POD to determine workload * Fixed Node Condition Bug, added exception handling to return get_rs_owner_ref --- .../plugin/filter_health_model_builder.rb | 31 +- .../plugin/health/agg_monitor_id_labels.rb | 1 + .../aggregate_monitor_state_finalizer.rb | 2 + .../plugin/health/cluster_health_state.rb | 2 + .../health_container_cpu_memory_aggregator.rb | 71 +- ...h_container_cpu_memory_record_formatter.rb | 2 + .../plugin/health/health_hierarchy_builder.rb | 2 + .../health/health_kube_api_down_handler.rb | 2 + .../health/health_kubernetes_resources.rb | 90 +- .../health/health_missing_signal_generator.rb | 2 + .../code/plugin/health/health_model_buffer.rb | 2 + .../plugin/health/health_model_builder.rb | 1 + .../plugin/health/health_model_constants.rb | 1 + .../health/health_model_definition_parser.rb | 1 + .../plugin/health/health_monitor_helpers.rb | 1 + .../plugin/health/health_monitor_optimizer.rb | 1 + .../plugin/health/health_monitor_provider.rb | 1 + .../plugin/health/health_monitor_record.rb | 1 + .../plugin/health/health_monitor_state.rb | 1 + .../plugin/health/health_monitor_telemetry.rb | 1 + .../plugin/health/health_monitor_utils.rb | 73 +- .../plugin/health/health_signal_reducer.rb | 1 + source/code/plugin/health/monitor_factory.rb | 1 + .../plugin/health/parent_monitor_provider.rb | 1 + source/code/plugin/health/unit_monitor.rb | 1 + source/code/plugin/in_kube_health.rb | 22 +- source/code/plugin/out_health_forward.rb | 1 + ...th_container_cpu_memory_aggregator_spec.rb | 8 +- .../health/health_kubernetes_resource_spec.rb | 26 +- .../health/health_model_builder_test.rb | 977 +++++++++--------- 30 files changed, 680 insertions(+), 647 deletions(-) diff --git a/source/code/plugin/filter_health_model_builder.rb b/source/code/plugin/filter_health_model_builder.rb index afb514a73..47ce7a631 100644 --- a/source/code/plugin/filter_health_model_builder.rb +++ b/source/code/plugin/filter_health_model_builder.rb @@ -39,17 +39,16 @@ def initialize @kube_api_down_handler = HealthKubeApiDownHandler.new @resources = HealthKubernetesResources.instance @reducer = HealthSignalReducer.new - @state = HealthMonitorState.new @generator = HealthMissingSignalGenerator.new - #TODO: cluster_labels needs to be initialized @provider = HealthMonitorProvider.new(@@cluster_id, HealthMonitorUtils.get_cluster_labels, @resources, @health_monitor_config_path) - deserialized_state_info = @cluster_health_state.get_state - @state = HealthMonitorState.new - @state.initialize_state(deserialized_state_info) @cluster_old_state = 'none' @cluster_new_state = 'none' @container_cpu_memory_records = [] @telemetry = HealthMonitorTelemetry.new + @state = HealthMonitorState.new + # move network calls to the end. This will ensure all the instance variables get initialized + deserialized_state_info = @cluster_health_state.get_state + @state.initialize_state(deserialized_state_info) rescue => e ApplicationInsightsUtility.sendExceptionTelemetry(e, {"FeatureArea" => "Health"}) end @@ -99,6 +98,10 @@ def filter_stream(tag, es) end container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider) deduped_records = container_records_aggregator.dedupe_records(container_records) + if @container_cpu_memory_records.nil? + @log.info "@container_cpu_memory_records was not initialized" + @container_cpu_memory_records = [] #in some clusters, this is null, so initialize it again. + end @container_cpu_memory_records.push(*deduped_records) # push the records for aggregation later return MultiEventStream.new elsif tag.start_with?("kubehealth.ReplicaSet") @@ -106,14 +109,16 @@ def filter_stream(tag, es) es.each{|time, record| records.push(record) } - @buffer.add_to_buffer(records) - - container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider) - container_records_aggregator.aggregate(@container_cpu_memory_records) - container_records_aggregator.compute_state - aggregated_container_records = container_records_aggregator.get_records - @buffer.add_to_buffer(aggregated_container_records) - + @buffer.add_to_buffer(records) # in_kube_health records + + aggregated_container_records = [] + if !@container_cpu_memory_records.nil? && !@container_cpu_memory_records.empty? + container_records_aggregator = HealthContainerCpuMemoryAggregator.new(@resources, @provider) + container_records_aggregator.aggregate(@container_cpu_memory_records) + container_records_aggregator.compute_state + aggregated_container_records = container_records_aggregator.get_records + end + @buffer.add_to_buffer(aggregated_container_records) #container cpu/memory records records_to_process = @buffer.get_buffer @buffer.reset_buffer @container_cpu_memory_records = [] diff --git a/source/code/plugin/health/agg_monitor_id_labels.rb b/source/code/plugin/health/agg_monitor_id_labels.rb index d5c724a86..03680d054 100644 --- a/source/code/plugin/health/agg_monitor_id_labels.rb +++ b/source/code/plugin/health/agg_monitor_id_labels.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require_relative 'health_model_constants' module HealthModel diff --git a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb index 74e780924..dd69c9c4d 100644 --- a/source/code/plugin/health/aggregate_monitor_state_finalizer.rb +++ b/source/code/plugin/health/aggregate_monitor_state_finalizer.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + module HealthModel class AggregateMonitorStateFinalizer diff --git a/source/code/plugin/health/cluster_health_state.rb b/source/code/plugin/health/cluster_health_state.rb index 3b56dd243..fa9cb42b2 100644 --- a/source/code/plugin/health/cluster_health_state.rb +++ b/source/code/plugin/health/cluster_health_state.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require "net/http" require "net/https" require "uri" diff --git a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb index ef1016158..f6b57e0ae 100644 --- a/source/code/plugin/health/health_container_cpu_memory_aggregator.rb +++ b/source/code/plugin/health/health_container_cpu_memory_aggregator.rb @@ -1,4 +1,12 @@ +# frozen_string_literal: true + require_relative 'health_model_constants' + +# Require only when running inside container. +# otherwise unit tests will fail due to ApplicationInsightsUtility dependency on base omsagent ruby files. If you have your dev machine starting with omsagent-rs, then GOOD LUCK! +if Socket.gethostname.start_with?('omsagent-rs') + require_relative '../ApplicationInsightsUtility' +end =begin @cpu_records/@memory_records [ @@ -37,6 +45,10 @@ class HealthContainerCpuMemoryAggregator @@memory_counter_name = 'memoryRssBytes' @@cpu_counter_name = 'cpuUsageNanoCores' + @@workload_container_count_empty_event_sent = {} + @@limit_is_array_event_sent = {} + @@WORKLOAD_CONTAINER_COUNT_EMPTY_EVENT = "WorkloadContainerCountEmptyEvent" + @@LIMIT_IS_ARRAY_EVENT = "ResourceLimitIsAnArrayEvent" def initialize(resources, provider) @pod_uid_lookup = resources.get_pod_uid_lookup @workload_container_count = resources.get_workload_container_count @@ -163,11 +175,30 @@ def get_records container_cpu_memory_records = [] @cpu_records.each{|resource_key, record| + + cpu_limit_mc = 1.0 + if record["limit"].is_a?(Numeric) + cpu_limit_mc = record["limit"]/1000000.to_f + else + @log.info "CPU Limit is not a number #{record['limit']}" + if !@@limit_is_array_event_sent.key?(resource_key) + custom_properties = {} + custom_properties['limit'] = record['limit'] + if record['limit'].is_a?(Array) + record['limit'].each_index{|i| + custom_properties[i] = record['limit'][i] + } + end + @@limit_is_array_event_sent[resource_key] = true + #send once per resource key + ApplicationInsightsUtility.sendCustomEvent(@@LIMIT_IS_ARRAY_EVENT, custom_properties) + end + end health_monitor_record = { "timestamp" => time_now, "state" => record["state"], "details" => { - "cpu_limit_millicores" => record["limit"]/1000000.to_f, + "cpu_limit_millicores" => cpu_limit_mc, "cpu_usage_instances" => record["records"].map{|r| r.each {|k,v| k == "counter_value" ? r[k] = r[k] / 1000000.to_f : r[k] }}, @@ -219,12 +250,10 @@ def get_records private def calculate_monitor_state(v, config) - if !v['limit_set'] && v['namespace'] != 'kube-system' - v["state"] = HealthMonitorStates::WARNING - else - # sort records by descending order of metric - v["records"] = v["records"].sort_by{|record| record["counter_value"]}.reverse - size = v["records"].size + # sort records by descending order of metric + v["records"] = v["records"].sort_by{|record| record["counter_value"]}.reverse + size = v["records"].size + if !v["record_count"].nil? if size < v["record_count"] unknown_count = v["record_count"] - size for i in unknown_count.downto(1) @@ -232,16 +261,30 @@ def calculate_monitor_state(v, config) v["records"].insert(0, {"counter_value" => -1, "container" => v["container"], "pod_name" => "???", "state" => HealthMonitorStates::UNKNOWN }) #insert -1 for unknown records end end + else + v["state"] = HealthMonitorStates::UNKNOWN + container_key = "#{v['workload_name']}~~#{v['container']}" + @log.info "ContainerKey: #{container_key} Records Size: #{size} Records: #{v['records']} Record Count: #{v['record_count']} #{@workload_container_count}" - if size == 1 - state_index = 0 - else - state_threshold = config['StateThresholdPercentage'].to_f - count = ((state_threshold*size)/100).ceil - state_index = size - count + if !@@workload_container_count_empty_event_sent.key?(container_key) + custom_properties = {} + custom_properties = custom_properties.merge(v) + custom_properties = custom_properties.merge(@workload_container_count) + @log.info "Custom Properties : #{custom_properties}" + @@workload_container_count_empty_event_sent[container_key] = true + ApplicationInsightsUtility.sendCustomEvent(@@WORKLOAD_CONTAINER_COUNT_EMPTY_EVENT, custom_properties) end - v["state"] = v["records"][state_index]["state"] + return #simply return the state as unknown here + end + + if size == 1 + state_index = 0 + else + state_threshold = config['StateThresholdPercentage'].to_f + count = ((state_threshold*size)/100).ceil + state_index = size - count end + v["state"] = v["records"][state_index]["state"] end def calculate_container_instance_state(counter_value, limit, config) diff --git a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb index 5c7db82d9..0c3f061f1 100644 --- a/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb +++ b/source/code/plugin/health/health_container_cpu_memory_record_formatter.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + module HealthModel class HealthContainerCpuMemoryRecordFormatter diff --git a/source/code/plugin/health/health_hierarchy_builder.rb b/source/code/plugin/health/health_hierarchy_builder.rb index 2da0050db..bb48e083b 100644 --- a/source/code/plugin/health/health_hierarchy_builder.rb +++ b/source/code/plugin/health/health_hierarchy_builder.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'json' module HealthModel class HealthHierarchyBuilder diff --git a/source/code/plugin/health/health_kube_api_down_handler.rb b/source/code/plugin/health/health_kube_api_down_handler.rb index a87c43ef1..bb91f2e3b 100644 --- a/source/code/plugin/health/health_kube_api_down_handler.rb +++ b/source/code/plugin/health/health_kube_api_down_handler.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require_relative 'health_model_constants' module HealthModel class HealthKubeApiDownHandler diff --git a/source/code/plugin/health/health_kubernetes_resources.rb b/source/code/plugin/health/health_kubernetes_resources.rb index 30a9ac7ca..743dd8b94 100644 --- a/source/code/plugin/health/health_kubernetes_resources.rb +++ b/source/code/plugin/health/health_kubernetes_resources.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'singleton' require_relative 'health_model_constants' @@ -5,20 +7,20 @@ module HealthModel class HealthKubernetesResources include Singleton - attr_accessor :node_inventory, :pod_inventory, :deployment_inventory, :pod_uid_lookup, :workload_container_count + attr_accessor :node_inventory, :pod_inventory, :replicaset_inventory, :pod_uid_lookup, :workload_container_count attr_reader :nodes, :pods, :workloads, :deployment_lookup def initialize - @node_inventory = [] - @pod_inventory = [] - @deployment_inventory = [] + @node_inventory = {} + @pod_inventory = {} + @replicaset_inventory = {} @nodes = [] @pods = [] @workloads = [] @log = HealthMonitorHelpers.get_log_handle @pod_uid_lookup = {} - @deployment_lookup = {} @workload_container_count = {} + @workload_name_cache = {} end def get_node_inventory @@ -36,9 +38,8 @@ def get_nodes return @nodes end - def set_deployment_inventory(deployments) - @deployment_inventory = deployments - @deployment_lookup = {} + def set_replicaset_inventory(replicasets) + @replicaset_inventory = replicasets end def get_workload_names @@ -51,7 +52,12 @@ def get_workload_names end def build_pod_uid_lookup + if @pod_inventory.nil? || @pod_inventory['items'].nil? || @pod_inventory['items'].empty? || @pod_inventory['items'].size == 0 + @log.info "Not Clearing pod_uid_lookup and workload_container_count since pod inventory is nil" + return + end @workload_container_count = {} + @pod_uid_lookup = {} @pod_inventory['items'].each do |pod| begin namespace = pod['metadata']['namespace'] @@ -92,7 +98,7 @@ def build_pod_uid_lookup end end rescue => e - @log.info "Error in build_pod_uid_lookup #{pod} #{e.message}" + @log.info "Error in build_pod_uid_lookup for POD: #{pod_name} #{e.message} #{e.backtrace}" end end end @@ -105,19 +111,7 @@ def get_workload_container_count return @workload_container_count end - private def get_workload_name(pod) - - if @deployment_lookup.empty? - @deployment_inventory['items'].each do |deployment| - match_labels = deployment['spec']['selector']['matchLabels'].to_h - namespace = deployment['metadata']['namespace'] - match_labels.each{|k,v| - @deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}" - } - end - end - begin has_owner = !pod['metadata']['ownerReferences'].nil? owner_kind = '' @@ -129,7 +123,6 @@ def get_workload_name(pod) controller_name = pod['metadata']['name'] end namespace = pod['metadata']['namespace'] - workload_name = '' if owner_kind.nil? owner_kind = 'Pod' @@ -139,41 +132,22 @@ def get_workload_name(pod) # we are excluding jobs return nil when 'replicaset' - # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name - labels = pod['metadata']['labels'].to_h - labels.each {|k,v| - lookup_key = "#{namespace}-#{k}=#{v}" - if @deployment_lookup.key?(lookup_key) - workload_name = @deployment_lookup[lookup_key] - break - end - } - if workload_name.empty? - workload_name = "#{namespace}~~#{controller_name}" - end + #TODO: + workload_name = get_replica_set_owner_ref(controller_name) + workload_name = "#{namespace}~~#{workload_name}" when 'daemonset' workload_name = "#{namespace}~~#{controller_name}" else - workload_name = "#{namespace}~~#{pod['metadata']['name']}" + workload_name = "#{namespace}~~#{controller_name}" end return workload_name rescue => e - @log.info "Error in get_workload_name(pod) #{e.message}" + @log.info "Error in get_workload_name(pod) #{e.message} #{e.backtrace}" return nil end end def get_workload_kind(pod) - if @deployment_lookup.empty? - @deployment_inventory['items'].each do |deployment| - match_labels = deployment['spec']['selector']['matchLabels'].to_h - namespace = deployment['metadata']['namespace'] - match_labels.each{|k,v| - @deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}" - } - end - end - begin has_owner = !pod['metadata']['ownerReferences'].nil? owner_kind = '' @@ -193,6 +167,30 @@ def get_workload_kind(pod) end end + private + def get_replica_set_owner_ref(controller_name) + if @workload_name_cache.key?(controller_name) + return @workload_name_cache[controller_name] + end + begin + owner_ref = controller_name + @replicaset_inventory['items'].each{|rs| + rs_name = rs['metadata']['name'] + if controller_name.casecmp(rs_name) == 0 + if !rs['metadata']['ownerReferences'].nil? + owner_ref = rs['metadata']['ownerReferences'][0]['name'] if rs['metadata']['ownerReferences'][0]['name'] + end + break + end + } + @workload_name_cache[controller_name] = owner_ref + return owner_ref + rescue => e + @log.info "Error in get_replica_set_owner_ref(controller_name) #{e.message}" + return controller_name + end + end + def get_node_capacity(node_name, type) if node_name.nil? #unscheduled pods will not have a node name return -1 diff --git a/source/code/plugin/health/health_missing_signal_generator.rb b/source/code/plugin/health/health_missing_signal_generator.rb index 1827a0190..84af81ea7 100644 --- a/source/code/plugin/health/health_missing_signal_generator.rb +++ b/source/code/plugin/health/health_missing_signal_generator.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require_relative 'health_model_constants' require_relative 'health_monitor_record' diff --git a/source/code/plugin/health/health_model_buffer.rb b/source/code/plugin/health/health_model_buffer.rb index 1ccfe7349..1c3ec3332 100644 --- a/source/code/plugin/health/health_model_buffer.rb +++ b/source/code/plugin/health/health_model_buffer.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + module HealthModel =begin diff --git a/source/code/plugin/health/health_model_builder.rb b/source/code/plugin/health/health_model_builder.rb index 13813c8d9..43ed30d05 100644 --- a/source/code/plugin/health/health_model_builder.rb +++ b/source/code/plugin/health/health_model_builder.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require 'time' module HealthModel diff --git a/source/code/plugin/health/health_model_constants.rb b/source/code/plugin/health/health_model_constants.rb index 0922c7ff2..c74f86f4d 100644 --- a/source/code/plugin/health/health_model_constants.rb +++ b/source/code/plugin/health/health_model_constants.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true module HealthModel class MonitorState CRITICAL = "fail" diff --git a/source/code/plugin/health/health_model_definition_parser.rb b/source/code/plugin/health/health_model_definition_parser.rb index f6c7a781d..907bc1fd1 100644 --- a/source/code/plugin/health/health_model_definition_parser.rb +++ b/source/code/plugin/health/health_model_definition_parser.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true =begin Class to parse the health model definition. The definition expresses the relationship between monitors, how to roll up to an aggregate monitor, and what labels to "pass on" to the parent monitor diff --git a/source/code/plugin/health/health_monitor_helpers.rb b/source/code/plugin/health/health_monitor_helpers.rb index f784ae76e..74aa35af0 100644 --- a/source/code/plugin/health/health_monitor_helpers.rb +++ b/source/code/plugin/health/health_monitor_helpers.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require 'logger' require 'digest' require_relative 'health_model_constants' diff --git a/source/code/plugin/health/health_monitor_optimizer.rb b/source/code/plugin/health/health_monitor_optimizer.rb index b33c8a986..a63d59abf 100644 --- a/source/code/plugin/health/health_monitor_optimizer.rb +++ b/source/code/plugin/health/health_monitor_optimizer.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true module HealthModel class HealthMonitorOptimizer #ctor diff --git a/source/code/plugin/health/health_monitor_provider.rb b/source/code/plugin/health/health_monitor_provider.rb index e75824268..b36c46370 100644 --- a/source/code/plugin/health/health_monitor_provider.rb +++ b/source/code/plugin/health/health_monitor_provider.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require_relative 'health_model_constants' module HealthModel diff --git a/source/code/plugin/health/health_monitor_record.rb b/source/code/plugin/health/health_monitor_record.rb index 873736c3a..7df84ff53 100644 --- a/source/code/plugin/health/health_monitor_record.rb +++ b/source/code/plugin/health/health_monitor_record.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true HealthMonitorRecord = Struct.new( :monitor_id, :monitor_instance_id, diff --git a/source/code/plugin/health/health_monitor_state.rb b/source/code/plugin/health/health_monitor_state.rb index 8e2294cc9..16f8bedc4 100644 --- a/source/code/plugin/health/health_monitor_state.rb +++ b/source/code/plugin/health/health_monitor_state.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require_relative 'health_model_constants' module HealthModel diff --git a/source/code/plugin/health/health_monitor_telemetry.rb b/source/code/plugin/health/health_monitor_telemetry.rb index 4e80a5145..1227e1f83 100644 --- a/source/code/plugin/health/health_monitor_telemetry.rb +++ b/source/code/plugin/health/health_monitor_telemetry.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require_relative 'health_model_constants' require 'socket' if Socket.gethostname.start_with?('omsagent-rs') diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index e21fdc83d..0d297d215 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require 'logger' require 'digest' require_relative 'health_model_constants' @@ -73,59 +74,17 @@ def is_cluster_health_model_enabled end end - def get_pods_ready_hash(pod_inventory, deployment_inventory) + def get_pods_ready_hash(resources) pods_ready_percentage_hash = {} - deployment_lookup = {} - deployment_inventory['items'].each do |deployment| - match_labels = deployment['spec']['selector']['matchLabels'].to_h - namespace = deployment['metadata']['namespace'] - match_labels.each{|k,v| - deployment_lookup["#{namespace}-#{k}=#{v}"] = "#{deployment['metadata']['namespace']}~~#{deployment['metadata']['name']}" - } - end - pod_inventory['items'].each do |pod| + resources.pod_inventory['items'].each do |pod| begin - has_owner = !pod['metadata']['ownerReferences'].nil? - owner_kind = '' - if has_owner - owner_kind = pod['metadata']['ownerReferences'][0]['kind'] - controller_name = pod['metadata']['ownerReferences'][0]['name'] - else - owner_kind = pod['kind'] - controller_name = pod['metadata']['name'] - #log.info "#{JSON.pretty_generate(pod)}" - end - + workload_name = resources.get_workload_name(pod) namespace = pod['metadata']['namespace'] status = pod['status']['phase'] - - workload_name = '' - if owner_kind.nil? - owner_kind = 'Pod' - end - case owner_kind.downcase - when 'job' - # we are excluding jobs + owner_kind = resources.get_workload_kind(pod) + if owner_kind.casecmp('job') == 0 next - when 'replicaset' - # get the labels, and see if there is a match. If there is, it is the deployment. If not, use replica set name/controller name - labels = pod['metadata']['labels'].to_h - labels.each {|k,v| - lookup_key = "#{namespace}-#{k}=#{v}" - if deployment_lookup.key?(lookup_key) - workload_name = deployment_lookup[lookup_key] - break - end - } - if workload_name.empty? - workload_name = "#{namespace}~~#{controller_name}" - end - when 'daemonset' - workload_name = "#{namespace}~~#{controller_name}" - else - workload_name = "#{namespace}~~#{pod['metadata']['name']}" end - if pods_ready_percentage_hash.key?(workload_name) total_pods = pods_ready_percentage_hash[workload_name]['totalPods'] pods_ready = pods_ready_percentage_hash[workload_name]['podsReady'] @@ -141,7 +100,7 @@ def get_pods_ready_hash(pod_inventory, deployment_inventory) pods_ready_percentage_hash[workload_name] = {'totalPods' => total_pods, 'podsReady' => pods_ready, 'namespace' => namespace, 'workload_name' => workload_name, 'kind' => owner_kind} rescue => e - log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" + @log.info "Error when processing pod #{pod['metadata']['name']} #{e.message}" end end return pods_ready_percentage_hash @@ -152,30 +111,30 @@ def get_node_state_from_node_conditions(monitor_config, node_conditions) failtypes = ['outofdisk', 'networkunavailable'].to_set #default fail types if !monitor_config.nil? && !monitor_config["NodeConditionTypesForFailedState"].nil? failtypes = monitor_config["NodeConditionTypesForFailedState"] - if !failtypes.nil? - failtypes = failtypes.split(',').map{|x| x.downcase}.map{|x| x.gsub(" ","")}.to_set - end + if !failtypes.nil? + failtypes = failtypes.split(',').map{|x| x.downcase}.map{|x| x.gsub(" ","")}.to_set + end end - log = get_log_handle - #log.info "Fail Types #{failtypes.inspect}" + log = get_log_handle + #log.info "Fail Types #{failtypes.inspect}" node_conditions.each do |condition| type = condition['type'] status = condition['status'] #for each condition in the configuration, check if the type is not false. If yes, update state to fail if (failtypes.include?(type.downcase) && (status == 'True' || status == 'Unknown')) - return "fail" + return HealthMonitorStates::FAIL elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown')) - return "warn" + return HealthMonitorStates::WARNING elsif type == "Ready" && status == 'True' pass = true end end if pass - return "pass" + return HealthMonitorStates::PASS else - return "fail" + return HealthMonitorStates::FAIL end end diff --git a/source/code/plugin/health/health_signal_reducer.rb b/source/code/plugin/health/health_signal_reducer.rb index f92f24ac3..4708c4ee5 100644 --- a/source/code/plugin/health/health_signal_reducer.rb +++ b/source/code/plugin/health/health_signal_reducer.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require_relative 'health_model_constants' module HealthModel diff --git a/source/code/plugin/health/monitor_factory.rb b/source/code/plugin/health/monitor_factory.rb index 5f2c3945c..1e4f6f5b8 100644 --- a/source/code/plugin/health/monitor_factory.rb +++ b/source/code/plugin/health/monitor_factory.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require_relative 'aggregate_monitor' require_relative 'unit_monitor' diff --git a/source/code/plugin/health/parent_monitor_provider.rb b/source/code/plugin/health/parent_monitor_provider.rb index 4ab6e6297..e5766ea1b 100644 --- a/source/code/plugin/health/parent_monitor_provider.rb +++ b/source/code/plugin/health/parent_monitor_provider.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require_relative 'health_model_constants' module HealthModel class ParentMonitorProvider diff --git a/source/code/plugin/health/unit_monitor.rb b/source/code/plugin/health/unit_monitor.rb index 9af599321..6454007b6 100644 --- a/source/code/plugin/health/unit_monitor.rb +++ b/source/code/plugin/health/unit_monitor.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require_relative 'health_model_constants' require 'json' diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index 9a1b8f9a9..affbdd275 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -86,11 +86,11 @@ def enumerate node_inventory = JSON.parse(node_inventory_response.body) pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods") pod_inventory = JSON.parse(pod_inventory_response.body) - deployment_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("deployments", api_version: "extensions/v1beta1").body) + replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_version: "extensions/v1beta1").body) @resources.node_inventory = node_inventory @resources.pod_inventory = pod_inventory - @resources.set_deployment_inventory(deployment_inventory) + @resources.set_replicaset_inventory(replicaset_inventory) @resources.build_pod_uid_lookup if node_inventory_response.code.to_i != 200 @@ -106,7 +106,7 @@ def enumerate health_monitor_records.push(record) if record record = process_memory_oversubscribed_monitor(pod_inventory, node_inventory) health_monitor_records.push(record) if record - pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(pod_inventory, deployment_inventory) + pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(@resources) system_pods = pods_ready_hash.select{|k,v| v['namespace'] == 'kube-system'} workload_pods = pods_ready_hash.select{|k,v| v['namespace'] != 'kube-system'} @@ -121,7 +121,7 @@ def enumerate health_monitor_records.push(record) if record end else - hmlog.info "POD INVENTORY IS NIL" + @@hmlog.info "POD INVENTORY IS NIL" end if !node_inventory.nil? @@ -130,7 +130,7 @@ def enumerate health_monitor_records.push(record) if record end else - hmlog.info "NODE INVENTORY IS NIL" + @@hmlog.info "NODE INVENTORY IS NIL" end health_monitor_records.each do |record| @@ -260,14 +260,14 @@ def process_node_condition_monitor(node_inventory) node_inventory['items'].each do |node| node_name = node['metadata']['name'] conditions = node['status']['conditions'] - state = HealthMonitorUtils.get_node_state_from_node_conditions(monitor_config, conditions) + node_state = HealthMonitorUtils.get_node_state_from_node_conditions(monitor_config, conditions) details = {} conditions.each do |condition| - state = !(condition['status'].downcase == 'true' && condition['type'].downcase != 'ready') ? HealthMonitorStates::PASS : HealthMonitorStates::FAIL - details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message'], "State" => state} + condition_state = !(condition['status'].downcase == 'true' && condition['type'].downcase != 'ready') ? HealthMonitorStates::PASS : HealthMonitorStates::FAIL + details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message'], "State" => condition_state} #@@hmlog.info "Node Condition details: #{JSON.pretty_generate(details)}" end - health_monitor_record = {"timestamp" => timestamp, "state" => state, "details" => details} + health_monitor_record = {"timestamp" => timestamp, "state" => node_state, "details" => details} monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id, node_name]) health_record = {} time_now = Time.now.utc.iso8601 @@ -291,11 +291,11 @@ def initialize_inventory node_inventory = JSON.parse(node_inventory_response.body) pod_inventory_response = KubernetesApiClient.getKubeResourceInfo("pods") pod_inventory = JSON.parse(pod_inventory_response.body) - deployment_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("deployments", api_version: "extensions/v1beta1").body) + replicaset_inventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("replicasets", api_version: "extensions/v1beta1").body) @resources.node_inventory = node_inventory @resources.pod_inventory = pod_inventory - @resources.set_deployment_inventory(deployment_inventory) + @resources.set_replicaset_inventory(replicaset_inventory) @resources.build_pod_uid_lookup end diff --git a/source/code/plugin/out_health_forward.rb b/source/code/plugin/out_health_forward.rb index 18664a22a..6fcfe368b 100644 --- a/source/code/plugin/out_health_forward.rb +++ b/source/code/plugin/out_health_forward.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true # # Fluentd # diff --git a/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb b/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb index 074878fe2..6972916bf 100644 --- a/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb +++ b/test/code/plugin/health/health_container_cpu_memory_aggregator_spec.rb @@ -25,7 +25,7 @@ resources.pod_inventory = pods resources.node_inventory = nodes - resources.set_deployment_inventory(deployments) + resources.set_replicaset_inventory(deployments) resources.build_pod_uid_lookup #call this in in_kube_health every min cluster_labels = { @@ -60,7 +60,7 @@ resources.pod_inventory = pods resources.node_inventory = nodes - resources.set_deployment_inventory(deployments) + resources.set_replicaset_inventory(deployments) resources.build_pod_uid_lookup #call this in in_kube_health every min cluster_labels = { @@ -113,7 +113,7 @@ resources.pod_inventory = pods resources.node_inventory = nodes - resources.set_deployment_inventory(deployments) + resources.set_replicaset_inventory(deployments) resources.build_pod_uid_lookup #call this in in_kube_health every min cluster_labels = { @@ -163,7 +163,7 @@ resources.pod_inventory = pods resources.node_inventory = nodes - resources.set_deployment_inventory(deployments) + resources.set_replicaset_inventory(deployments) resources.build_pod_uid_lookup #call this in in_kube_health every min cluster_labels = { diff --git a/test/code/plugin/health/health_kubernetes_resource_spec.rb b/test/code/plugin/health/health_kubernetes_resource_spec.rb index dbeec4858..f4daedace 100644 --- a/test/code/plugin/health/health_kubernetes_resource_spec.rb +++ b/test/code/plugin/health/health_kubernetes_resource_spec.rb @@ -207,7 +207,7 @@ resources = HealthKubernetesResources.instance resources.node_inventory = nodes resources.pod_inventory = pods - resources.set_deployment_inventory(deployments) + resources.set_replicaset_inventory(deployments) #act parsed_nodes = resources.get_nodes parsed_workloads = resources.get_workload_names @@ -217,28 +217,6 @@ assert_equal parsed_workloads.size, 3 assert_equal parsed_nodes, ['aks-nodepool1-19574989-0', 'aks-nodepool1-19574989-1'] - parsed_workloads.sort.must_equal ['default~~diliprdeploymentnodeapps', 'default~~rss-site', 'kube-system~~kube-proxy'].sort + parsed_workloads.sort.must_equal ['default~~diliprdeploymentnodeapps-c4fdfb446', 'default~~rss-site', 'kube-system~~kube-proxy'].sort end - - # it 'builds the pod_uid lookup correctly' do - # #arrange - # f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json') - # nodes = JSON.parse(f) - # f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json') - # pods = JSON.parse(f) - # f = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/deployments.json') - # deployments = JSON.parse(f) - - # resources = HealthKubernetesResources.instance - - # resources.node_inventory = nodes - # resources.pod_inventory = pods - # resources.set_deployment_inventory(deployments) #resets deployment_lookup -- this was causing Unit test failures - - # resources.build_pod_uid_lookup - - # resources.pod_uid_lookup - # resources.workload_container_count - - # end end \ No newline at end of file diff --git a/test/code/plugin/health/health_model_builder_test.rb b/test/code/plugin/health/health_model_builder_test.rb index a7c5e0927..3015ae55f 100644 --- a/test/code/plugin/health/health_model_builder_test.rb +++ b/test/code/plugin/health/health_model_builder_test.rb @@ -7,489 +7,510 @@ class FilterHealthModelBuilderTest < Test::Unit::TestCase include HealthModel - def test_event_stream - #setup - health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json') - health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) - monitor_factory = MonitorFactory.new - hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) - # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side - state_finalizers = [AggregateMonitorStateFinalizer.new] - monitor_set = MonitorSet.new - model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) - - nodes_file_map = { - #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", - "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - } - - pods_file_map = { - #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", - "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - } - - cluster_labels = { - 'container.azm.ms/cluster-region' => 'eastus', - 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', - 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', - 'container.azm.ms/cluster-name' => 'dilipr-health-test' - } - - cluster_id = 'fake_cluster_id' - - #test - state = HealthMonitorState.new() - generator = HealthMissingSignalGenerator.new - - for scenario in ["first", "second", "third"] - mock_data_path = File.join(__dir__, "../../../../health_records/#{scenario}_daemon_set_signals.json") - file = File.read(mock_data_path) - records = JSON.parse(file) - - node_inventory = JSON.parse(File.read(nodes_file_map[scenario])) - pod_inventory = JSON.parse(File.read(pods_file_map[scenario])) - deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/deployments.json"))) - resources = HealthKubernetesResources.instance - resources.node_inventory = node_inventory - resources.pod_inventory = pod_inventory - resources.set_deployment_inventory(deployment_inventory) - - workload_names = resources.get_workload_names - provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json")) - - health_monitor_records = [] - records.each do |record| - monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] - monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] - health_monitor_record = HealthMonitorRecord.new( - record[HealthMonitorRecordFields::MONITOR_ID], - record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], - record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], - record[HealthMonitorRecordFields::DETAILS]["state"], - provider.get_labels(record), - provider.get_config(monitor_id), - record[HealthMonitorRecordFields::DETAILS] - ) - - state.update_state(health_monitor_record, - provider.get_config(health_monitor_record.monitor_id) - ) - - # get the health state based on the monitor's operational state - # update state calls updates the state of the monitor based on configuration and history of the the monitor records - health_monitor_record.state = state.get_state(monitor_instance_id).new_state - health_monitor_records.push(health_monitor_record) - instance_state = state.get_state(monitor_instance_id) - #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" - end - - - #handle kube api down - kube_api_down_handler = HealthKubeApiDownHandler.new - health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) - - # Dedupe daemonset signals - # Remove unit monitor signals for “gone” objects - reducer = HealthSignalReducer.new() - reduced_records = reducer.reduce_signals(health_monitor_records, resources) - - cluster_id = 'fake_cluster_id' - - #get the list of 'none' and 'unknown' signals - missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) - #update state for missing signals - missing_signals.each{|signal| - state.update_state(signal, - provider.get_config(signal.monitor_id) - ) - } - generator.update_last_received_records(reduced_records) - reduced_records.push(*missing_signals) - - # build the health model - all_records = reduced_records - model_builder.process_records(all_records) - all_monitors = model_builder.finalize_model - - # update the state for aggregate monitors (unit monitors are updated above) - all_monitors.each{|monitor_instance_id, monitor| - if monitor.is_aggregate_monitor - state.update_state(monitor, - provider.get_config(monitor.monitor_id) - ) - end - - instance_state = state.get_state(monitor_instance_id) - #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" - should_send = instance_state.should_send - - # always send cluster monitor as a heartbeat - if !should_send && monitor_instance_id != MonitorId::CLUSTER - all_monitors.delete(monitor_instance_id) - end - } - - records_to_send = [] - all_monitors.keys.each{|key| - record = provider.get_record(all_monitors[key], state) - #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" - } - - if scenario == "first" - assert_equal 50, all_monitors.size - elsif scenario == "second" - assert_equal 34, all_monitors.size - elsif scenario == "third" - assert_equal 5, all_monitors.size - end - # for each key in monitor.keys, - # get the state from health_monitor_state - # generate the record to send - serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json')) - serializer.serialize(state) - - deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json')) - deserialized_state = deserializer.deserialize - - after_state = HealthMonitorState.new - after_state.initialize_state(deserialized_state) - end - end - - def test_event_stream_aks_engine - - #setup - health_definition_path = File.join(__dir__, '../../../../installer\conf\health_model_definition.json') - health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) - monitor_factory = MonitorFactory.new - hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) - state_finalizers = [AggregateMonitorStateFinalizer.new] - monitor_set = MonitorSet.new - model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) - - nodes_file_map = { - #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", - #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", - "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", - "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", - "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", - } - - pods_file_map = { - #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", - #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", - "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", - "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", - "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", - } - - cluster_labels = { - 'container.azm.ms/cluster-region' => 'eastus', - 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', - 'container.azm.ms/cluster-resource-group' => 'aks-engine-health', - 'container.azm.ms/cluster-name' => 'aks-engine-health' - } - - cluster_id = 'fake_cluster_id' - - #test - state = HealthMonitorState.new() - generator = HealthMissingSignalGenerator.new - - for scenario in 1..3 - mock_data_path = File.join(__dir__, "../../../../health_records/aks-engine/aks-engine-#{scenario}.json") - file = File.read(mock_data_path) - records = JSON.parse(file) - - node_inventory = JSON.parse(File.read(nodes_file_map["aks-engine-#{scenario}"])) - pod_inventory = JSON.parse(File.read(pods_file_map["aks-engine-#{scenario}"])) - deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/aks-engine/deployments.json"))) - resources = HealthKubernetesResources.instance - resources.node_inventory = node_inventory - resources.pod_inventory = pod_inventory - resources.deployment_inventory = deployment_inventory - - workload_names = resources.get_workload_names - provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json")) - - health_monitor_records = [] - records.each do |record| - monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] - monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] - health_monitor_record = HealthMonitorRecord.new( - record[HealthMonitorRecordFields::MONITOR_ID], - record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], - record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], - record[HealthMonitorRecordFields::DETAILS]["state"], - provider.get_labels(record), - provider.get_config(monitor_id), - record[HealthMonitorRecordFields::DETAILS] - ) - - state.update_state(health_monitor_record, - provider.get_config(health_monitor_record.monitor_id) - ) - - # get the health state based on the monitor's operational state - # update state calls updates the state of the monitor based on configuration and history of the the monitor records - health_monitor_record.state = state.get_state(monitor_instance_id).new_state - health_monitor_records.push(health_monitor_record) - instance_state = state.get_state(monitor_instance_id) - #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" - end - - - #handle kube api down - kube_api_down_handler = HealthKubeApiDownHandler.new - health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) - - # Dedupe daemonset signals - # Remove unit monitor signals for “gone” objects - reducer = HealthSignalReducer.new() - reduced_records = reducer.reduce_signals(health_monitor_records, resources) - - cluster_id = 'fake_cluster_id' - - #get the list of 'none' and 'unknown' signals - missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) - #update state for missing signals - missing_signals.each{|signal| - state.update_state(signal, - provider.get_config(signal.monitor_id) - ) - } - generator.update_last_received_records(reduced_records) - reduced_records.push(*missing_signals) - - # build the health model - all_records = reduced_records - model_builder.process_records(all_records) - all_monitors = model_builder.finalize_model - - # update the state for aggregate monitors (unit monitors are updated above) - all_monitors.each{|monitor_instance_id, monitor| - if monitor.is_aggregate_monitor - state.update_state(monitor, - provider.get_config(monitor.monitor_id) - ) - end - - instance_state = state.get_state(monitor_instance_id) - #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" - should_send = instance_state.should_send - - # always send cluster monitor as a heartbeat - if !should_send && monitor_instance_id != MonitorId::CLUSTER - all_monitors.delete(monitor_instance_id) - end - } - - records_to_send = [] - all_monitors.keys.each{|key| - record = provider.get_record(all_monitors[key], state) - #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" - } - - if scenario == 1 - assert_equal 58, all_monitors.size - elsif scenario == 2 - assert_equal 37, all_monitors.size - elsif scenario == 3 - assert_equal 6, all_monitors.size - end - # for each key in monitor.keys, - # get the state from health_monitor_state - # generate the record to send - serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json')) - serializer.serialize(state) - - deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json')) - deserialized_state = deserializer.deserialize - - after_state = HealthMonitorState.new - after_state.initialize_state(deserialized_state) - end - end - - def test_container_memory_cpu_with_model - health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json') - health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) - monitor_factory = MonitorFactory.new - hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) - # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side - state_finalizers = [AggregateMonitorStateFinalizer.new] - monitor_set = MonitorSet.new - model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) - - nodes_file_map = { - "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json", - "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json", - "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json", - } - - pods_file_map = { - "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json", - "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json", - "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json", - } - - cluster_labels = { - 'container.azm.ms/cluster-region' => 'eastus', - 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', - 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', - 'container.azm.ms/cluster-name' => 'dilipr-health-test' - } - - cluster_id = 'fake_cluster_id' - - #test - state = HealthMonitorState.new() - generator = HealthMissingSignalGenerator.new - - mock_data_path = "C:/Users/dilipr/desktop/health/container_cpu_memory/daemonset.json" - file = File.read(mock_data_path) - records = JSON.parse(file) - - node_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json")) - pod_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json")) - deployment_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/deployments.json")) + # def test_event_stream + # #setup + # health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json') + # health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) + # monitor_factory = MonitorFactory.new + # hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) + # # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side + # state_finalizers = [AggregateMonitorStateFinalizer.new] + # monitor_set = MonitorSet.new + # model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) + + # nodes_file_map = { + # #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", + # "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # } + + # pods_file_map = { + # #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", + # "first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # } + + # cluster_labels = { + # 'container.azm.ms/cluster-region' => 'eastus', + # 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', + # 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', + # 'container.azm.ms/cluster-name' => 'dilipr-health-test' + # } + + # cluster_id = 'fake_cluster_id' + + # #test + # state = HealthMonitorState.new() + # generator = HealthMissingSignalGenerator.new + + # for scenario in ["first", "second", "third"] + # mock_data_path = File.join(__dir__, "../../../../health_records/#{scenario}_daemon_set_signals.json") + # file = File.read(mock_data_path) + # records = JSON.parse(file) + + # node_inventory = JSON.parse(File.read(nodes_file_map[scenario])) + # pod_inventory = JSON.parse(File.read(pods_file_map[scenario])) + # deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/deployments.json"))) + # resources = HealthKubernetesResources.instance + # resources.node_inventory = node_inventory + # resources.pod_inventory = pod_inventory + # resources.set_replicaset_inventory(deployment_inventory) + + # workload_names = resources.get_workload_names + # provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json")) + + # health_monitor_records = [] + # records.each do |record| + # monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] + # monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + # health_monitor_record = HealthMonitorRecord.new( + # record[HealthMonitorRecordFields::MONITOR_ID], + # record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + # record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + # record[HealthMonitorRecordFields::DETAILS]["state"], + # provider.get_labels(record), + # provider.get_config(monitor_id), + # record[HealthMonitorRecordFields::DETAILS] + # ) + + # state.update_state(health_monitor_record, + # provider.get_config(health_monitor_record.monitor_id) + # ) + + # # get the health state based on the monitor's operational state + # # update state calls updates the state of the monitor based on configuration and history of the the monitor records + # health_monitor_record.state = state.get_state(monitor_instance_id).new_state + # health_monitor_records.push(health_monitor_record) + # instance_state = state.get_state(monitor_instance_id) + # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + # end + + + # #handle kube api down + # kube_api_down_handler = HealthKubeApiDownHandler.new + # health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) + + # # Dedupe daemonset signals + # # Remove unit monitor signals for “gone” objects + # reducer = HealthSignalReducer.new() + # reduced_records = reducer.reduce_signals(health_monitor_records, resources) + + # cluster_id = 'fake_cluster_id' + + # #get the list of 'none' and 'unknown' signals + # missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) + # #update state for missing signals + # missing_signals.each{|signal| + # state.update_state(signal, + # provider.get_config(signal.monitor_id) + # ) + # } + # generator.update_last_received_records(reduced_records) + # reduced_records.push(*missing_signals) + + # # build the health model + # all_records = reduced_records + # model_builder.process_records(all_records) + # all_monitors = model_builder.finalize_model + + # # update the state for aggregate monitors (unit monitors are updated above) + # all_monitors.each{|monitor_instance_id, monitor| + # if monitor.is_aggregate_monitor + # state.update_state(monitor, + # provider.get_config(monitor.monitor_id) + # ) + # end + + # instance_state = state.get_state(monitor_instance_id) + # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + # should_send = instance_state.should_send + + # # always send cluster monitor as a heartbeat + # if !should_send && monitor_instance_id != MonitorId::CLUSTER + # all_monitors.delete(monitor_instance_id) + # end + # } + + # records_to_send = [] + # all_monitors.keys.each{|key| + # record = provider.get_record(all_monitors[key], state) + # #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" + # } + + # if scenario == "first" + # assert_equal 50, all_monitors.size + # elsif scenario == "second" + # assert_equal 34, all_monitors.size + # elsif scenario == "third" + # assert_equal 5, all_monitors.size + # end + # # for each key in monitor.keys, + # # get the state from health_monitor_state + # # generate the record to send + # serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json')) + # serializer.serialize(state) + + # deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state.json')) + # deserialized_state = deserializer.deserialize + + # after_state = HealthMonitorState.new + # after_state.initialize_state(deserialized_state) + # end + # end + + # def test_event_stream_aks_engine + + # #setup + # health_definition_path = File.join(__dir__, '../../../../installer\conf\health_model_definition.json') + # health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) + # monitor_factory = MonitorFactory.new + # hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) + # state_finalizers = [AggregateMonitorStateFinalizer.new] + # monitor_set = MonitorSet.new + # model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) + + # nodes_file_map = { + # #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_nodes.json", + # #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/nodes.json", + # "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + # "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + # "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/nodes.json", + # } + + # pods_file_map = { + # #"extra" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/extra_pods.json", + # #"first" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"first-nosecondnode" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"second" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"third" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"fourth" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"missing" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # #"kube_api_down" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/pods.json", + # "aks-engine-1" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", + # "aks-engine-2" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", + # "aks-engine-3" => "C:/AzureMonitor/ContainerInsights/Docker-Provider/inventory/aks-engine/pods.json", + # } + + # cluster_labels = { + # 'container.azm.ms/cluster-region' => 'eastus', + # 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', + # 'container.azm.ms/cluster-resource-group' => 'aks-engine-health', + # 'container.azm.ms/cluster-name' => 'aks-engine-health' + # } + + # cluster_id = 'fake_cluster_id' + + # #test + # state = HealthMonitorState.new() + # generator = HealthMissingSignalGenerator.new + + # for scenario in 1..3 + # mock_data_path = File.join(__dir__, "../../../../health_records/aks-engine/aks-engine-#{scenario}.json") + # file = File.read(mock_data_path) + # records = JSON.parse(file) + + # node_inventory = JSON.parse(File.read(nodes_file_map["aks-engine-#{scenario}"])) + # pod_inventory = JSON.parse(File.read(pods_file_map["aks-engine-#{scenario}"])) + # deployment_inventory = JSON.parse(File.read(File.join(__dir__, "../../../../inventory/aks-engine/deployments.json"))) + # resources = HealthKubernetesResources.instance + # resources.node_inventory = node_inventory + # resources.pod_inventory = pod_inventory + # resources.deployment_inventory = deployment_inventory + + # workload_names = resources.get_workload_names + # provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json")) + + # health_monitor_records = [] + # records.each do |record| + # monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] + # monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + # health_monitor_record = HealthMonitorRecord.new( + # record[HealthMonitorRecordFields::MONITOR_ID], + # record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + # record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + # record[HealthMonitorRecordFields::DETAILS]["state"], + # provider.get_labels(record), + # provider.get_config(monitor_id), + # record[HealthMonitorRecordFields::DETAILS] + # ) + + # state.update_state(health_monitor_record, + # provider.get_config(health_monitor_record.monitor_id) + # ) + + # # get the health state based on the monitor's operational state + # # update state calls updates the state of the monitor based on configuration and history of the the monitor records + # health_monitor_record.state = state.get_state(monitor_instance_id).new_state + # health_monitor_records.push(health_monitor_record) + # instance_state = state.get_state(monitor_instance_id) + # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + # end + + + # #handle kube api down + # kube_api_down_handler = HealthKubeApiDownHandler.new + # health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) + + # # Dedupe daemonset signals + # # Remove unit monitor signals for “gone” objects + # reducer = HealthSignalReducer.new() + # reduced_records = reducer.reduce_signals(health_monitor_records, resources) + + # cluster_id = 'fake_cluster_id' + + # #get the list of 'none' and 'unknown' signals + # missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) + # #update state for missing signals + # missing_signals.each{|signal| + # state.update_state(signal, + # provider.get_config(signal.monitor_id) + # ) + # } + # generator.update_last_received_records(reduced_records) + # reduced_records.push(*missing_signals) + + # # build the health model + # all_records = reduced_records + # model_builder.process_records(all_records) + # all_monitors = model_builder.finalize_model + + # # update the state for aggregate monitors (unit monitors are updated above) + # all_monitors.each{|monitor_instance_id, monitor| + # if monitor.is_aggregate_monitor + # state.update_state(monitor, + # provider.get_config(monitor.monitor_id) + # ) + # end + + # instance_state = state.get_state(monitor_instance_id) + # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + # should_send = instance_state.should_send + + # # always send cluster monitor as a heartbeat + # if !should_send && monitor_instance_id != MonitorId::CLUSTER + # all_monitors.delete(monitor_instance_id) + # end + # } + + # records_to_send = [] + # all_monitors.keys.each{|key| + # record = provider.get_record(all_monitors[key], state) + # #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" + # } + + # if scenario == 1 + # assert_equal 58, all_monitors.size + # elsif scenario == 2 + # assert_equal 37, all_monitors.size + # elsif scenario == 3 + # assert_equal 6, all_monitors.size + # end + # # for each key in monitor.keys, + # # get the state from health_monitor_state + # # generate the record to send + # serializer = HealthStateSerializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json')) + # serializer.serialize(state) + + # deserializer = HealthStateDeserializer.new(File.join(__dir__, '../../../../health_records\health_model_state_aks-engine.json')) + # deserialized_state = deserializer.deserialize + + # after_state = HealthMonitorState.new + # after_state.initialize_state(deserialized_state) + # end + # end + + # def test_container_memory_cpu_with_model + # health_definition_path = File.join(__dir__, '../../../../installer/conf/health_model_definition.json') + # health_model_definition = ParentMonitorProvider.new(HealthModelDefinitionParser.new(health_definition_path).parse_file) + # monitor_factory = MonitorFactory.new + # hierarchy_builder = HealthHierarchyBuilder.new(health_model_definition, monitor_factory) + # # TODO: Figure out if we need to add NodeMonitorHierarchyReducer to the list of finalizers. For now, dont compress/optimize, since it becomes impossible to construct the model on the UX side + # state_finalizers = [AggregateMonitorStateFinalizer.new] + # monitor_set = MonitorSet.new + # model_builder = HealthModelBuilder.new(hierarchy_builder, state_finalizers, monitor_set) + + # nodes_file_map = { + # "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json", + # "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json", + # "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json", + # } + + # pods_file_map = { + # "first" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json", + # "second" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json", + # "third" => "C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json", + # } + + # cluster_labels = { + # 'container.azm.ms/cluster-region' => 'eastus', + # 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', + # 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', + # 'container.azm.ms/cluster-name' => 'dilipr-health-test' + # } + + # cluster_id = 'fake_cluster_id' + + # #test + # state = HealthMonitorState.new() + # generator = HealthMissingSignalGenerator.new + + # mock_data_path = "C:/Users/dilipr/desktop/health/container_cpu_memory/daemonset.json" + # file = File.read(mock_data_path) + # records = JSON.parse(file) + + # node_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/nodes.json")) + # pod_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/pods.json")) + # deployment_inventory = JSON.parse(File.read("C:/Users/dilipr/desktop/health/container_cpu_memory/deployments.json")) + # resources = HealthKubernetesResources.instance + # resources.node_inventory = node_inventory + # resources.pod_inventory = pod_inventory + # resources.set_replicaset_inventory(deployment_inventory) + + # workload_names = resources.get_workload_names + # provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json")) + + + # #container memory cpu records + # file = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/cadvisor_perf.json') + # cadvisor_records = JSON.parse(file) + # cadvisor_records = cadvisor_records.select{|record| record['DataItems'][0]['ObjectName'] == 'K8SContainer'} + # formatted_records = [] + # formatter = HealthContainerCpuMemoryRecordFormatter.new + # cadvisor_records.each{|record| + # formatted_record = formatter.get_record_from_cadvisor_record(record) + # formatted_records.push(formatted_record) + # } + + # resources.build_pod_uid_lookup #call this in in_kube_health every min + + # cluster_labels = { + # 'container.azm.ms/cluster-region' => 'eastus', + # 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', + # 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', + # 'container.azm.ms/cluster-name' => 'dilipr-health-test' + # } + + # cluster_id = 'fake_cluster_id' + + # aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider) + # deduped_records = aggregator.dedupe_records(formatted_records) + # aggregator.aggregate(deduped_records) + # aggregator.compute_state + # container_cpu_memory_records = aggregator.get_records + + # records.concat(container_cpu_memory_records) + + # health_monitor_records = [] + # records.each do |record| + # monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] + # monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] + # health_monitor_record = HealthMonitorRecord.new( + # record[HealthMonitorRecordFields::MONITOR_ID], + # record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], + # record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], + # record[HealthMonitorRecordFields::DETAILS]["state"], + # provider.get_labels(record), + # provider.get_config(monitor_id), + # record[HealthMonitorRecordFields::DETAILS] + # ) + + # state.update_state(health_monitor_record, + # provider.get_config(health_monitor_record.monitor_id) + # ) + + # # get the health state based on the monitor's operational state + # # update state calls updates the state of the monitor based on configuration and history of the the monitor records + # health_monitor_record.state = state.get_state(monitor_instance_id).new_state + # health_monitor_records.push(health_monitor_record) + # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + # end + + # #handle kube api down + # kube_api_down_handler = HealthKubeApiDownHandler.new + # health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) + + # # Dedupe daemonset signals + # # Remove unit monitor signals for “gone” objects + # reducer = HealthSignalReducer.new() + # reduced_records = reducer.reduce_signals(health_monitor_records, resources) + + # cluster_id = 'fake_cluster_id' + + # #get the list of 'none' and 'unknown' signals + # missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) + # #update state for missing signals + # missing_signals.each{|signal| + # state.update_state(signal, + # provider.get_config(signal.monitor_id) + # ) + # } + # generator.update_last_received_records(reduced_records) + # reduced_records.push(*missing_signals) + + # # build the health model + # all_records = reduced_records + # model_builder.process_records(all_records) + # all_monitors = model_builder.finalize_model + + # # update the state for aggregate monitors (unit monitors are updated above) + # all_monitors.each{|monitor_instance_id, monitor| + # if monitor.is_aggregate_monitor + # state.update_state(monitor, + # provider.get_config(monitor.monitor_id) + # ) + # end + + # instance_state = state.get_state(monitor_instance_id) + # #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" + # should_send = instance_state.should_send + + # # always send cluster monitor as a heartbeat + # if !should_send && monitor_instance_id != MonitorId::CLUSTER + # all_monitors.delete(monitor_instance_id) + # end + # } + + # records_to_send = [] + # all_monitors.keys.each{|key| + # record = provider.get_record(all_monitors[key], state) + # #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" + # } + # end + + def test_get_workload_name + # node_inventory = JSON.parse(File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/dilipr-health-test-nodes.json")) + # pod_inventory = JSON.parse(File.read('C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/dilipr-health-test-pods.json')) + # replicaset_inventory = JSON.parse(File.read('C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/dilipr-health-test-rs.json')) + node_inventory = JSON.parse(File.read("C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/jobyaks2-nodes.json")) + pod_inventory = JSON.parse(File.read('C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/jobyaks2-pods.json')) + replicaset_inventory = JSON.parse(File.read('C:/AzureMonitor/ContainerInsights/Docker-Provider/test/code/plugin/health/jobyaks2-rs.json')) resources = HealthKubernetesResources.instance resources.node_inventory = node_inventory resources.pod_inventory = pod_inventory - resources.set_deployment_inventory(deployment_inventory) - - workload_names = resources.get_workload_names - provider = HealthMonitorProvider.new(cluster_id, cluster_labels, resources, File.join(__dir__, "../../../../installer/conf/healthmonitorconfig.json")) - - - #container memory cpu records - file = File.read('C:/Users/dilipr/desktop/health/container_cpu_memory/cadvisor_perf.json') - cadvisor_records = JSON.parse(file) - cadvisor_records = cadvisor_records.select{|record| record['DataItems'][0]['ObjectName'] == 'K8SContainer'} - formatted_records = [] - formatter = HealthContainerCpuMemoryRecordFormatter.new - cadvisor_records.each{|record| - formatted_record = formatter.get_record_from_cadvisor_record(record) - formatted_records.push(formatted_record) + resources.set_replicaset_inventory(replicaset_inventory) + pod_inventory['items'].each{|pod| + workload_name = resources.get_workload_name(pod) + puts "POD #{pod['metadata']['name']} Workload Name #{workload_name}" } - resources.build_pod_uid_lookup #call this in in_kube_health every min + pods_ready_hash = HealthMonitorUtils.get_pods_ready_hash(resources) - cluster_labels = { - 'container.azm.ms/cluster-region' => 'eastus', - 'container.azm.ms/cluster-subscription-id' => '72c8e8ca-dc16-47dc-b65c-6b5875eb600a', - 'container.azm.ms/cluster-resource-group' => 'dilipr-health-test', - 'container.azm.ms/cluster-name' => 'dilipr-health-test' - } - - cluster_id = 'fake_cluster_id' - - aggregator = HealthContainerCpuMemoryAggregator.new(resources, provider) - deduped_records = aggregator.dedupe_records(formatted_records) - aggregator.aggregate(deduped_records) - aggregator.compute_state - container_cpu_memory_records = aggregator.get_records - - records.concat(container_cpu_memory_records) - - health_monitor_records = [] - records.each do |record| - monitor_instance_id = record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID] - monitor_id = record[HealthMonitorRecordFields::MONITOR_ID] - health_monitor_record = HealthMonitorRecord.new( - record[HealthMonitorRecordFields::MONITOR_ID], - record[HealthMonitorRecordFields::MONITOR_INSTANCE_ID], - record[HealthMonitorRecordFields::TIME_FIRST_OBSERVED], - record[HealthMonitorRecordFields::DETAILS]["state"], - provider.get_labels(record), - provider.get_config(monitor_id), - record[HealthMonitorRecordFields::DETAILS] - ) - - state.update_state(health_monitor_record, - provider.get_config(health_monitor_record.monitor_id) - ) - - # get the health state based on the monitor's operational state - # update state calls updates the state of the monitor based on configuration and history of the the monitor records - health_monitor_record.state = state.get_state(monitor_instance_id).new_state - health_monitor_records.push(health_monitor_record) - #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" - end - - #handle kube api down - kube_api_down_handler = HealthKubeApiDownHandler.new - health_monitor_records = kube_api_down_handler.handle_kube_api_down(health_monitor_records) - - # Dedupe daemonset signals - # Remove unit monitor signals for “gone” objects - reducer = HealthSignalReducer.new() - reduced_records = reducer.reduce_signals(health_monitor_records, resources) - - cluster_id = 'fake_cluster_id' - - #get the list of 'none' and 'unknown' signals - missing_signals = generator.get_missing_signals(cluster_id, reduced_records, resources, provider) - #update state for missing signals - missing_signals.each{|signal| - state.update_state(signal, - provider.get_config(signal.monitor_id) - ) - } - generator.update_last_received_records(reduced_records) - reduced_records.push(*missing_signals) - - # build the health model - all_records = reduced_records - model_builder.process_records(all_records) - all_monitors = model_builder.finalize_model - - # update the state for aggregate monitors (unit monitors are updated above) - all_monitors.each{|monitor_instance_id, monitor| - if monitor.is_aggregate_monitor - state.update_state(monitor, - provider.get_config(monitor.monitor_id) - ) - end - - instance_state = state.get_state(monitor_instance_id) - #puts "#{monitor_instance_id} #{instance_state.new_state} #{instance_state.old_state} #{instance_state.should_send}" - should_send = instance_state.should_send - - # always send cluster monitor as a heartbeat - if !should_send && monitor_instance_id != MonitorId::CLUSTER - all_monitors.delete(monitor_instance_id) - end - } - - records_to_send = [] - all_monitors.keys.each{|key| - record = provider.get_record(all_monitors[key], state) - #puts "#{record["MonitorInstanceId"]} #{record["OldState"]} #{record["NewState"]}" - } + puts JSON.pretty_generate(pods_ready_hash) end end \ No newline at end of file