...

Source file src/github.com/chaos-mesh/chaos-mesh/pkg/metrics/chaos-daemon.go

Documentation: github.com/chaos-mesh/chaos-mesh/pkg/metrics

     1  // Copyright 2021 Chaos Mesh Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  
    16  package metrics
    17  
    18  import (
    19  	"context"
    20  
    21  	"github.com/go-logr/logr"
    22  	grpcprometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
    23  	"github.com/prometheus/client_golang/prometheus"
    24  
    25  	"github.com/chaos-mesh/chaos-mesh/pkg/chaosdaemon/crclients"
    26  	"github.com/chaos-mesh/chaos-mesh/pkg/log"
    27  	"github.com/chaos-mesh/chaos-mesh/pkg/metrics/utils"
    28  )
    29  
    30  var (
    31  	// DefaultChaosDaemonMetricsCollector is the default metrics collector for chaos daemon
    32  	DefaultChaosDaemonMetricsCollector = NewChaosDaemonMetricsCollector(log.L().WithName("chaos-daemon").WithName("metrics"))
    33  
    34  	// ChaosDaemonGrpcServerBuckets is the buckets for gRPC server handling histogram metrics
    35  	ChaosDaemonGrpcServerBuckets = []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 10}
    36  )
    37  
    38  const (
    39  	// kubernetesPodNameLabel, kubernetesPodNamespaceLabel and kubernetesContainerNameLabel are the label keys
    40  	//   indicating the kubernetes information of the container under `k8s.io/kubernetes` package
    41  	// And it is best not to set `k8s.io/kubernetes` as dependency, see more: https://github.com/kubernetes/kubernetes/issues/90358#issuecomment-617859364.
    42  	kubernetesPodNameLabel       = "io.kubernetes.pod.name"
    43  	kubernetesPodNamespaceLabel  = "io.kubernetes.pod.namespace"
    44  	kubernetesContainerNameLabel = "io.kubernetes.container.name"
    45  	// chaosDaemonMetricsSubsystem is the subsystem name for chaos daemon metrics
    46  	chaosDaemonMetricsSubsystem = "chaos_daemon"
    47  )
    48  
    49  func WithHistogramName(name string) grpcprometheus.HistogramOption {
    50  	return func(opts *prometheus.HistogramOpts) {
    51  		opts.Name = name
    52  	}
    53  }
    54  
    55  type ChaosDaemonMetricsCollector struct {
    56  	crClient            crclients.ContainerRuntimeInfoClient
    57  	logger              logr.Logger
    58  	iptablesPackets     *prometheus.GaugeVec
    59  	iptablesPacketBytes *prometheus.GaugeVec
    60  	ipsetMembers        *prometheus.GaugeVec
    61  	tcRules             *prometheus.GaugeVec
    62  }
    63  
    64  // NewChaosDaemonMetricsCollector initializes metrics for each chaos daemon
    65  func NewChaosDaemonMetricsCollector(logger logr.Logger) *ChaosDaemonMetricsCollector {
    66  	return &ChaosDaemonMetricsCollector{
    67  		logger: logger,
    68  		iptablesPackets: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    69  			Subsystem: chaosDaemonMetricsSubsystem,
    70  			Name:      "iptables_packets",
    71  			Help:      "Total number of iptables packets",
    72  		}, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}),
    73  		iptablesPacketBytes: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    74  			Subsystem: chaosDaemonMetricsSubsystem,
    75  			Name:      "iptables_packet_bytes",
    76  			Help:      "Total bytes of iptables packets",
    77  		}, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}),
    78  		ipsetMembers: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    79  			Subsystem: chaosDaemonMetricsSubsystem,
    80  			Name:      "ipset_members",
    81  			Help:      "Total number of ipset members",
    82  		}, []string{"namespace", "pod", "container"}),
    83  		tcRules: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    84  			Subsystem: chaosDaemonMetricsSubsystem,
    85  			Name:      "tcs_rules",
    86  			Help:      "Total number of tc rules",
    87  		}, []string{"namespace", "pod", "container"}),
    88  	}
    89  }
    90  
    91  func (collector *ChaosDaemonMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
    92  	collector.iptablesPackets.Describe(ch)
    93  	collector.iptablesPacketBytes.Describe(ch)
    94  	collector.ipsetMembers.Describe(ch)
    95  	collector.tcRules.Describe(ch)
    96  }
    97  
    98  func (collector *ChaosDaemonMetricsCollector) Collect(ch chan<- prometheus.Metric) {
    99  	collector.collectNetworkMetrics()
   100  	collector.iptablesPackets.Collect(ch)
   101  	collector.iptablesPacketBytes.Collect(ch)
   102  	collector.ipsetMembers.Collect(ch)
   103  	collector.tcRules.Collect(ch)
   104  }
   105  
   106  func (collector *ChaosDaemonMetricsCollector) InjectCrClient(client crclients.ContainerRuntimeInfoClient) *ChaosDaemonMetricsCollector {
   107  	collector.crClient = client
   108  	return collector
   109  }
   110  
   111  func (collector *ChaosDaemonMetricsCollector) collectNetworkMetrics() {
   112  	collector.iptablesPackets.Reset()
   113  	collector.iptablesPacketBytes.Reset()
   114  	collector.ipsetMembers.Reset()
   115  	collector.tcRules.Reset()
   116  
   117  	containerIDs, err := collector.crClient.ListContainerIDs(context.Background())
   118  	if err != nil {
   119  		collector.logger.Error(err, "fail to list all container process IDs")
   120  		return
   121  	}
   122  
   123  	for _, containerID := range containerIDs {
   124  		pid, err := collector.crClient.GetPidFromContainerID(context.Background(), containerID)
   125  		if err != nil {
   126  			collector.logger.Error(err, "fail to get pid from container ID")
   127  			continue
   128  		}
   129  
   130  		labels, err := collector.crClient.GetLabelsFromContainerID(context.Background(), containerID)
   131  		if err != nil {
   132  			collector.logger.Error(err, "fail to get container labels", "containerID", containerID)
   133  			continue
   134  		}
   135  
   136  		namespace, podName, containerName := labels[kubernetesPodNamespaceLabel],
   137  			labels[kubernetesPodNameLabel], labels[kubernetesContainerNameLabel]
   138  
   139  		labelValues := []string{namespace, podName, containerName}
   140  		log := collector.logger.WithValues(
   141  			"namespace", namespace,
   142  			"podName", podName,
   143  			"containerName", containerName,
   144  			"containerID", containerID,
   145  		)
   146  
   147  		tables, err := utils.GetIptablesContentByNetNS(pid)
   148  		if err != nil {
   149  			log.Error(err, "fail to collect iptables metrics")
   150  		}
   151  		for tableName, table := range tables {
   152  			for chainName, chain := range table {
   153  				for _, rule := range chain.Rules {
   154  					collector.iptablesPackets.
   155  						WithLabelValues(namespace, podName, containerName, tableName, chainName, chain.Policy, rule.Rule).
   156  						Set(float64(rule.Packets))
   157  
   158  					collector.iptablesPacketBytes.
   159  						WithLabelValues(namespace, podName, containerName, tableName, chainName, chain.Policy, rule.Rule).
   160  						Set(float64(rule.Bytes))
   161  				}
   162  			}
   163  		}
   164  
   165  		members, err := utils.GetIPSetRulesNumberByNetNS(pid)
   166  		if err != nil {
   167  			log.Error(err, "fail to collect ipset member metric")
   168  		}
   169  		collector.ipsetMembers.WithLabelValues(labelValues...).Set(float64(members))
   170  
   171  		tcRules, err := utils.GetTcRulesNumberByNetNS(pid)
   172  		if err != nil {
   173  			log.Error(err, "fail to collect tc rules metric")
   174  		}
   175  		collector.tcRules.WithLabelValues(labelValues...).Set(float64(tcRules))
   176  	}
   177  }
   178