...

Source file src/github.com/chaos-mesh/chaos-mesh/pkg/metrics/chaos-controller-manager.go

Documentation: github.com/chaos-mesh/chaos-mesh/pkg/metrics

     1  // Copyright 2021 Chaos Mesh Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  
    16  package metrics
    17  
    18  import (
    19  	"context"
    20  	"reflect"
    21  
    22  	"github.com/go-logr/logr"
    23  	"github.com/prometheus/client_golang/prometheus"
    24  	ctrl "sigs.k8s.io/controller-runtime"
    25  	"sigs.k8s.io/controller-runtime/pkg/cache"
    26  
    27  	"github.com/chaos-mesh/chaos-mesh/api/v1alpha1"
    28  	"github.com/chaos-mesh/chaos-mesh/pkg/status"
    29  )
    30  
    31  // ChaosControllerManagerMetricsCollector implements prometheus.Collector interface
    32  type ChaosControllerManagerMetricsCollector struct {
    33  	logger              logr.Logger
    34  	store               cache.Cache
    35  	chaosExperiments    *prometheus.GaugeVec
    36  	SidecarTemplates    prometheus.Gauge
    37  	ConfigTemplates     *prometheus.GaugeVec
    38  	InjectionConfigs    *prometheus.GaugeVec
    39  	TemplateNotExist    *prometheus.CounterVec
    40  	TemplateLoadError   prometheus.Counter
    41  	ConfigNameDuplicate *prometheus.CounterVec
    42  	InjectRequired      *prometheus.CounterVec
    43  	Injections          *prometheus.CounterVec
    44  	chaosSchedules      *prometheus.GaugeVec
    45  	chaosWorkflows      *prometheus.GaugeVec
    46  	EmittedEvents       *prometheus.CounterVec
    47  }
    48  
    49  // NewChaosControllerManagerMetricsCollector initializes metrics and collector
    50  func NewChaosControllerManagerMetricsCollector(manager ctrl.Manager, registerer *prometheus.Registry, logger logr.Logger) *ChaosControllerManagerMetricsCollector {
    51  	var store cache.Cache
    52  	if manager != nil {
    53  		store = manager.GetCache()
    54  	}
    55  
    56  	c := &ChaosControllerManagerMetricsCollector{
    57  		logger: logger,
    58  		store:  store,
    59  		chaosExperiments: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    60  			Name: "chaos_controller_manager_chaos_experiments",
    61  			Help: "Total number of chaos experiments and their phases",
    62  		}, []string{"namespace", "kind", "phase"}),
    63  		SidecarTemplates: prometheus.NewGauge(prometheus.GaugeOpts{
    64  			Name: "chaos_mesh_templates",
    65  			Help: "Total number of injection templates",
    66  		}),
    67  		ConfigTemplates: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    68  			Name: "chaos_mesh_config_templates",
    69  			Help: "Total number of config templates",
    70  		}, []string{"namespace", "template"}),
    71  		InjectionConfigs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    72  			Name: "chaos_mesh_injection_configs",
    73  			Help: "Total number of injection configs",
    74  		}, []string{"namespace", "template"}),
    75  		TemplateNotExist: prometheus.NewCounterVec(prometheus.CounterOpts{
    76  			Name: "chaos_mesh_template_not_exist_total",
    77  			Help: "Total number of template not exist error",
    78  		}, []string{"namespace", "template"}),
    79  		ConfigNameDuplicate: prometheus.NewCounterVec(prometheus.CounterOpts{
    80  			Name: "chaos_mesh_config_name_duplicate_total",
    81  			Help: "Total number of config name duplication error",
    82  		}, []string{"namespace", "config"}),
    83  		TemplateLoadError: prometheus.NewCounter(prometheus.CounterOpts{
    84  			Name: "chaos_mesh_template_load_failed_total",
    85  			Help: "Total number of failures when rendering config args to template",
    86  		}),
    87  		InjectRequired: prometheus.NewCounterVec(prometheus.CounterOpts{
    88  			Name: "chaos_mesh_inject_required_total",
    89  			Help: "Total number of injections required",
    90  		}, []string{"namespace", "config"}),
    91  		Injections: prometheus.NewCounterVec(prometheus.CounterOpts{
    92  			Name: "chaos_mesh_injections_total",
    93  			Help: "Total number of sidecar injections performed on the webhook",
    94  		}, []string{"namespace", "config"}),
    95  		chaosSchedules: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    96  			Name: "chaos_controller_manager_chaos_schedules",
    97  			Help: "Total number of chaos schedules",
    98  		}, []string{"namespace"}),
    99  		chaosWorkflows: prometheus.NewGaugeVec(prometheus.GaugeOpts{
   100  			Name: "chaos_controller_manager_chaos_workflows",
   101  			Help: "Total number of chaos workflows",
   102  		}, []string{"namespace"}),
   103  		EmittedEvents: prometheus.NewCounterVec(prometheus.CounterOpts{
   104  			Name: "chaos_controller_manager_emitted_event_total",
   105  			Help: "Total number of the emitted event by chaos-controller-manager",
   106  		}, []string{"type", "reason", "namespace"}),
   107  	}
   108  
   109  	if registerer != nil {
   110  		registerer.MustRegister(c)
   111  	}
   112  	return c
   113  }
   114  
   115  // Describe implements the prometheus.Collector interface.
   116  func (collector *ChaosControllerManagerMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
   117  	collector.chaosExperiments.Describe(ch)
   118  	collector.SidecarTemplates.Describe(ch)
   119  	collector.ConfigTemplates.Describe(ch)
   120  	collector.InjectionConfigs.Describe(ch)
   121  	collector.TemplateNotExist.Describe(ch)
   122  	collector.ConfigNameDuplicate.Describe(ch)
   123  	collector.TemplateLoadError.Describe(ch)
   124  	collector.InjectRequired.Describe(ch)
   125  	collector.Injections.Describe(ch)
   126  	collector.EmittedEvents.Describe(ch)
   127  	collector.chaosSchedules.Describe(ch)
   128  	collector.chaosWorkflows.Describe(ch)
   129  }
   130  
   131  // Collect implements the prometheus.Collector interface.
   132  func (collector *ChaosControllerManagerMetricsCollector) Collect(ch chan<- prometheus.Metric) {
   133  	collector.collectChaosExperiments()
   134  	collector.collectChaosSchedules()
   135  	collector.collectChaosWorkflows()
   136  	collector.SidecarTemplates.Collect(ch)
   137  	collector.ConfigTemplates.Collect(ch)
   138  	collector.InjectionConfigs.Collect(ch)
   139  	collector.TemplateNotExist.Collect(ch)
   140  	collector.ConfigNameDuplicate.Collect(ch)
   141  	collector.TemplateLoadError.Collect(ch)
   142  	collector.InjectRequired.Collect(ch)
   143  	collector.Injections.Collect(ch)
   144  	collector.chaosExperiments.Collect(ch)
   145  	collector.chaosSchedules.Collect(ch)
   146  	collector.chaosWorkflows.Collect(ch)
   147  	collector.EmittedEvents.Collect(ch)
   148  }
   149  
   150  func (collector *ChaosControllerManagerMetricsCollector) collectChaosExperiments() {
   151  	// TODO(yeya24) if there is an error in List
   152  	// the experiment status will be lost
   153  	collector.chaosExperiments.Reset()
   154  
   155  	for kind, obj := range v1alpha1.AllKinds() {
   156  		expCache := map[string]map[string]int{}
   157  		chaosList := obj.SpawnList()
   158  		if err := collector.store.List(context.TODO(), chaosList); err != nil {
   159  			collector.logger.Error(err, "failed to list chaos", "kind", kind)
   160  			return
   161  		}
   162  
   163  		items := chaosList.GetItems()
   164  		for _, item := range items {
   165  			if _, ok := expCache[item.GetNamespace()]; !ok {
   166  				// There is only 4 supported phases
   167  				expCache[item.GetNamespace()] = make(map[string]int, 4)
   168  			}
   169  			innerObject := reflect.ValueOf(item).Interface().(v1alpha1.InnerObject)
   170  			expCache[item.GetNamespace()][string(status.GetChaosStatus(innerObject))]++
   171  		}
   172  
   173  		for ns, v := range expCache {
   174  			for phase, count := range v {
   175  				collector.chaosExperiments.WithLabelValues(ns, kind, phase).Set(float64(count))
   176  			}
   177  		}
   178  	}
   179  }
   180  
   181  func (collector *ChaosControllerManagerMetricsCollector) collectChaosSchedules() {
   182  	collector.chaosSchedules.Reset()
   183  
   184  	schedules := &v1alpha1.ScheduleList{}
   185  	if err := collector.store.List(context.TODO(), schedules); err != nil {
   186  		collector.logger.Error(err, "failed to list schedules")
   187  		return
   188  	}
   189  
   190  	countByNamespace := make(map[string]int)
   191  	items := schedules.GetItems()
   192  	for _, item := range items {
   193  		countByNamespace[item.GetNamespace()]++
   194  	}
   195  
   196  	for namespace, count := range countByNamespace {
   197  		collector.chaosSchedules.WithLabelValues(namespace).Set(float64(count))
   198  	}
   199  }
   200  
   201  func (collector *ChaosControllerManagerMetricsCollector) collectChaosWorkflows() {
   202  	collector.chaosWorkflows.Reset()
   203  
   204  	workflows := &v1alpha1.WorkflowList{}
   205  	if err := collector.store.List(context.TODO(), workflows); err != nil {
   206  		collector.logger.Error(err, "failed to list workflows")
   207  		return
   208  	}
   209  
   210  	countByNamespace := make(map[string]int)
   211  	items := workflows.GetItems()
   212  	for _, item := range items {
   213  		countByNamespace[item.GetNamespace()]++
   214  	}
   215  
   216  	for namespace, count := range countByNamespace {
   217  		collector.chaosWorkflows.WithLabelValues(namespace).Set(float64(count))
   218  	}
   219  }
   220