...

Source file src/github.com/chaos-mesh/chaos-mesh/pkg/metrics/chaos-daemon.go

Documentation: github.com/chaos-mesh/chaos-mesh/pkg/metrics

     1  // Copyright 2021 Chaos Mesh Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  
    16  package metrics
    17  
    18  import (
    19  	"context"
    20  
    21  	"github.com/go-logr/logr"
    22  	grpcprometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
    23  	"github.com/prometheus/client_golang/prometheus"
    24  
    25  	"github.com/chaos-mesh/chaos-mesh/pkg/chaosdaemon/crclients"
    26  	"github.com/chaos-mesh/chaos-mesh/pkg/log"
    27  	"github.com/chaos-mesh/chaos-mesh/pkg/metrics/utils"
    28  )
    29  
    30  var (
    31  	// DefaultChaosDaemonMetricsCollector is the default metrics collector for chaos daemon
    32  	DefaultChaosDaemonMetricsCollector = NewChaosDaemonMetricsCollector(log.L().WithName("chaos-daemon").WithName("metrics"))
    33  
    34  	// ChaosDaemonGrpcServerBuckets is the buckets for gRPC server handling histogram metrics
    35  	ChaosDaemonGrpcServerBuckets = []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 10}
    36  )
    37  
    38  const (
    39  	// kubernetesPodNameLabel, kubernetesPodNamespaceLabel and kubernetesContainerNameLabel are the label keys
    40  	//   indicating the kubernetes information of the container under `k8s.io/kubernetes` package
    41  	// And it is best not to set `k8s.io/kubernetes` as dependency, see more: https://github.com/kubernetes/kubernetes/issues/90358#issuecomment-617859364.
    42  	kubernetesPodNameLabel       = "io.kubernetes.pod.name"
    43  	kubernetesPodNamespaceLabel  = "io.kubernetes.pod.namespace"
    44  	kubernetesContainerNameLabel = "io.kubernetes.container.name"
    45  )
    46  
    47  func WithHistogramName(name string) grpcprometheus.HistogramOption {
    48  	return func(opts *prometheus.HistogramOpts) {
    49  		opts.Name = name
    50  	}
    51  }
    52  
    53  type ChaosDaemonMetricsCollector struct {
    54  	crClient            crclients.ContainerRuntimeInfoClient
    55  	logger              logr.Logger
    56  	iptablesPackets     *prometheus.GaugeVec
    57  	iptablesPacketBytes *prometheus.GaugeVec
    58  	ipsetMembers        *prometheus.GaugeVec
    59  	tcRules             *prometheus.GaugeVec
    60  }
    61  
    62  // NewChaosDaemonMetricsCollector initializes metrics for each chaos daemon
    63  func NewChaosDaemonMetricsCollector(logger logr.Logger) *ChaosDaemonMetricsCollector {
    64  	return &ChaosDaemonMetricsCollector{
    65  		logger: logger,
    66  		iptablesPackets: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    67  			Name: "chaos_daemon_iptables_packets",
    68  			Help: "Total number of iptables packets",
    69  		}, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}),
    70  		iptablesPacketBytes: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    71  			Name: "chaos_daemon_iptables_packet_bytes",
    72  			Help: "Total bytes of iptables packets",
    73  		}, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}),
    74  		ipsetMembers: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    75  			Name: "chaos_daemon_ipset_members",
    76  			Help: "Total number of ipset members",
    77  		}, []string{"namespace", "pod", "container"}),
    78  		tcRules: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    79  			Name: "chaos_daemon_tcs",
    80  			Help: "Total number of tc rules",
    81  		}, []string{"namespace", "pod", "container"}),
    82  	}
    83  }
    84  
    85  func (collector *ChaosDaemonMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
    86  	collector.iptablesPackets.Describe(ch)
    87  	collector.iptablesPacketBytes.Describe(ch)
    88  	collector.ipsetMembers.Describe(ch)
    89  	collector.tcRules.Describe(ch)
    90  }
    91  
    92  func (collector *ChaosDaemonMetricsCollector) Collect(ch chan<- prometheus.Metric) {
    93  	collector.collectNetworkMetrics()
    94  	collector.iptablesPackets.Collect(ch)
    95  	collector.iptablesPacketBytes.Collect(ch)
    96  	collector.ipsetMembers.Collect(ch)
    97  	collector.tcRules.Collect(ch)
    98  }
    99  
   100  func (collector *ChaosDaemonMetricsCollector) InjectCrClient(client crclients.ContainerRuntimeInfoClient) *ChaosDaemonMetricsCollector {
   101  	collector.crClient = client
   102  	return collector
   103  }
   104  
   105  func (collector *ChaosDaemonMetricsCollector) collectNetworkMetrics() {
   106  	collector.iptablesPackets.Reset()
   107  	collector.iptablesPacketBytes.Reset()
   108  	collector.ipsetMembers.Reset()
   109  	collector.tcRules.Reset()
   110  
   111  	containerIDs, err := collector.crClient.ListContainerIDs(context.Background())
   112  	if err != nil {
   113  		collector.logger.Error(err, "fail to list all container process IDs")
   114  		return
   115  	}
   116  
   117  	for _, containerID := range containerIDs {
   118  		pid, err := collector.crClient.GetPidFromContainerID(context.Background(), containerID)
   119  		if err != nil {
   120  			collector.logger.Error(err, "fail to get pid from container ID")
   121  			continue
   122  		}
   123  
   124  		labels, err := collector.crClient.GetLabelsFromContainerID(context.Background(), containerID)
   125  		if err != nil {
   126  			collector.logger.Error(err, "fail to get container labels", "containerID", containerID)
   127  			continue
   128  		}
   129  
   130  		namespace, podName, containerName := labels[kubernetesPodNamespaceLabel],
   131  			labels[kubernetesPodNameLabel], labels[kubernetesContainerNameLabel]
   132  
   133  		labelValues := []string{namespace, podName, containerName}
   134  		log := collector.logger.WithValues(
   135  			"namespace", namespace,
   136  			"podName", podName,
   137  			"containerName", containerName,
   138  			"containerID", containerID,
   139  		)
   140  
   141  		tables, err := utils.GetIptablesContentByNetNS(pid)
   142  		if err != nil {
   143  			log.Error(err, "fail to collect iptables metrics")
   144  		}
   145  		for tableName, table := range tables {
   146  			for chainName, chain := range table {
   147  				for _, rule := range chain.Rules {
   148  					collector.iptablesPackets.
   149  						WithLabelValues(namespace, podName, containerName, tableName, chainName, chain.Policy, rule.Rule).
   150  						Set(float64(rule.Packets))
   151  
   152  					collector.iptablesPacketBytes.
   153  						WithLabelValues(namespace, podName, containerName, tableName, chainName, chain.Policy, rule.Rule).
   154  						Set(float64(rule.Bytes))
   155  				}
   156  			}
   157  		}
   158  
   159  		members, err := utils.GetIPSetRulesNumberByNetNS(pid)
   160  		if err != nil {
   161  			log.Error(err, "fail to collect ipset member metric")
   162  		}
   163  		collector.ipsetMembers.WithLabelValues(labelValues...).Set(float64(members))
   164  
   165  		tcRules, err := utils.GetTcRulesNumberByNetNS(pid)
   166  		if err != nil {
   167  			log.Error(err, "fail to collect tc rules metric")
   168  		}
   169  		collector.tcRules.WithLabelValues(labelValues...).Set(float64(tcRules))
   170  	}
   171  }
   172