1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package metrics
17
18 import (
19 "context"
20 "reflect"
21
22 "github.com/go-logr/logr"
23 "github.com/prometheus/client_golang/prometheus"
24 ctrl "sigs.k8s.io/controller-runtime"
25 "sigs.k8s.io/controller-runtime/pkg/cache"
26
27 "github.com/chaos-mesh/chaos-mesh/api/v1alpha1"
28 "github.com/chaos-mesh/chaos-mesh/pkg/status"
29 )
30
31
32 type ChaosControllerManagerMetricsCollector struct {
33 logger logr.Logger
34 store cache.Cache
35 chaosExperiments *prometheus.GaugeVec
36 SidecarTemplates prometheus.Gauge
37 ConfigTemplates *prometheus.GaugeVec
38 InjectionConfigs *prometheus.GaugeVec
39 TemplateNotExist *prometheus.CounterVec
40 TemplateLoadError prometheus.Counter
41 ConfigNameDuplicate *prometheus.CounterVec
42 InjectRequired *prometheus.CounterVec
43 Injections *prometheus.CounterVec
44 chaosSchedules *prometheus.GaugeVec
45 chaosWorkflows *prometheus.GaugeVec
46 EmittedEvents *prometheus.CounterVec
47 }
48
49
50 func NewChaosControllerManagerMetricsCollector(manager ctrl.Manager, registerer *prometheus.Registry, logger logr.Logger) *ChaosControllerManagerMetricsCollector {
51 var store cache.Cache
52 if manager != nil {
53 store = manager.GetCache()
54 }
55
56 c := &ChaosControllerManagerMetricsCollector{
57 logger: logger,
58 store: store,
59 chaosExperiments: prometheus.NewGaugeVec(prometheus.GaugeOpts{
60 Name: "chaos_controller_manager_chaos_experiments",
61 Help: "Total number of chaos experiments and their phases",
62 }, []string{"namespace", "kind", "phase"}),
63 SidecarTemplates: prometheus.NewGauge(prometheus.GaugeOpts{
64 Name: "chaos_mesh_templates",
65 Help: "Total number of injection templates",
66 }),
67 ConfigTemplates: prometheus.NewGaugeVec(prometheus.GaugeOpts{
68 Name: "chaos_mesh_config_templates",
69 Help: "Total number of config templates",
70 }, []string{"namespace", "template"}),
71 InjectionConfigs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
72 Name: "chaos_mesh_injection_configs",
73 Help: "Total number of injection configs",
74 }, []string{"namespace", "template"}),
75 TemplateNotExist: prometheus.NewCounterVec(prometheus.CounterOpts{
76 Name: "chaos_mesh_template_not_exist_total",
77 Help: "Total number of template not exist error",
78 }, []string{"namespace", "template"}),
79 ConfigNameDuplicate: prometheus.NewCounterVec(prometheus.CounterOpts{
80 Name: "chaos_mesh_config_name_duplicate_total",
81 Help: "Total number of config name duplication error",
82 }, []string{"namespace", "config"}),
83 TemplateLoadError: prometheus.NewCounter(prometheus.CounterOpts{
84 Name: "chaos_mesh_template_load_failed_total",
85 Help: "Total number of failures when rendering config args to template",
86 }),
87 InjectRequired: prometheus.NewCounterVec(prometheus.CounterOpts{
88 Name: "chaos_mesh_inject_required_total",
89 Help: "Total number of injections required",
90 }, []string{"namespace", "config"}),
91 Injections: prometheus.NewCounterVec(prometheus.CounterOpts{
92 Name: "chaos_mesh_injections_total",
93 Help: "Total number of sidecar injections performed on the webhook",
94 }, []string{"namespace", "config"}),
95 chaosSchedules: prometheus.NewGaugeVec(prometheus.GaugeOpts{
96 Name: "chaos_controller_manager_chaos_schedules",
97 Help: "Total number of chaos schedules",
98 }, []string{"namespace"}),
99 chaosWorkflows: prometheus.NewGaugeVec(prometheus.GaugeOpts{
100 Name: "chaos_controller_manager_chaos_workflows",
101 Help: "Total number of chaos workflows",
102 }, []string{"namespace"}),
103 EmittedEvents: prometheus.NewCounterVec(prometheus.CounterOpts{
104 Name: "chaos_controller_manager_emitted_event_total",
105 Help: "Total number of the emitted event by chaos-controller-manager",
106 }, []string{"type", "reason", "namespace"}),
107 }
108
109 if registerer != nil {
110 registerer.MustRegister(c)
111 }
112 return c
113 }
114
115
116 func (collector *ChaosControllerManagerMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
117 collector.chaosExperiments.Describe(ch)
118 collector.SidecarTemplates.Describe(ch)
119 collector.ConfigTemplates.Describe(ch)
120 collector.InjectionConfigs.Describe(ch)
121 collector.TemplateNotExist.Describe(ch)
122 collector.ConfigNameDuplicate.Describe(ch)
123 collector.TemplateLoadError.Describe(ch)
124 collector.InjectRequired.Describe(ch)
125 collector.Injections.Describe(ch)
126 collector.EmittedEvents.Describe(ch)
127 collector.chaosSchedules.Describe(ch)
128 collector.chaosWorkflows.Describe(ch)
129 }
130
131
132 func (collector *ChaosControllerManagerMetricsCollector) Collect(ch chan<- prometheus.Metric) {
133 collector.collectChaosExperiments()
134 collector.collectChaosSchedules()
135 collector.collectChaosWorkflows()
136 collector.SidecarTemplates.Collect(ch)
137 collector.ConfigTemplates.Collect(ch)
138 collector.InjectionConfigs.Collect(ch)
139 collector.TemplateNotExist.Collect(ch)
140 collector.ConfigNameDuplicate.Collect(ch)
141 collector.TemplateLoadError.Collect(ch)
142 collector.InjectRequired.Collect(ch)
143 collector.Injections.Collect(ch)
144 collector.chaosExperiments.Collect(ch)
145 collector.chaosSchedules.Collect(ch)
146 collector.chaosWorkflows.Collect(ch)
147 collector.EmittedEvents.Collect(ch)
148 }
149
150 func (collector *ChaosControllerManagerMetricsCollector) collectChaosExperiments() {
151
152
153 collector.chaosExperiments.Reset()
154
155 for kind, obj := range v1alpha1.AllKinds() {
156 expCache := map[string]map[string]int{}
157 chaosList := obj.SpawnList()
158 if err := collector.store.List(context.TODO(), chaosList); err != nil {
159 collector.logger.Error(err, "failed to list chaos", "kind", kind)
160 return
161 }
162
163 items := chaosList.GetItems()
164 for _, item := range items {
165 if _, ok := expCache[item.GetNamespace()]; !ok {
166
167 expCache[item.GetNamespace()] = make(map[string]int, 4)
168 }
169 innerObject := reflect.ValueOf(item).Interface().(v1alpha1.InnerObject)
170 expCache[item.GetNamespace()][string(status.GetChaosStatus(innerObject))]++
171 }
172
173 for ns, v := range expCache {
174 for phase, count := range v {
175 collector.chaosExperiments.WithLabelValues(ns, kind, phase).Set(float64(count))
176 }
177 }
178 }
179 }
180
181 func (collector *ChaosControllerManagerMetricsCollector) collectChaosSchedules() {
182 collector.chaosSchedules.Reset()
183
184 schedules := &v1alpha1.ScheduleList{}
185 if err := collector.store.List(context.TODO(), schedules); err != nil {
186 collector.logger.Error(err, "failed to list schedules")
187 return
188 }
189
190 countByNamespace := make(map[string]int)
191 items := schedules.GetItems()
192 for _, item := range items {
193 countByNamespace[item.GetNamespace()]++
194 }
195
196 for namespace, count := range countByNamespace {
197 collector.chaosSchedules.WithLabelValues(namespace).Set(float64(count))
198 }
199 }
200
201 func (collector *ChaosControllerManagerMetricsCollector) collectChaosWorkflows() {
202 collector.chaosWorkflows.Reset()
203
204 workflows := &v1alpha1.WorkflowList{}
205 if err := collector.store.List(context.TODO(), workflows); err != nil {
206 collector.logger.Error(err, "failed to list workflows")
207 return
208 }
209
210 countByNamespace := make(map[string]int)
211 items := workflows.GetItems()
212 for _, item := range items {
213 countByNamespace[item.GetNamespace()]++
214 }
215
216 for namespace, count := range countByNamespace {
217 collector.chaosWorkflows.WithLabelValues(namespace).Set(float64(count))
218 }
219 }
220