1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package metrics
17
18 import (
19 "context"
20
21 "github.com/go-logr/logr"
22 grpcprometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
23 "github.com/prometheus/client_golang/prometheus"
24
25 "github.com/chaos-mesh/chaos-mesh/pkg/chaosdaemon/crclients"
26 "github.com/chaos-mesh/chaos-mesh/pkg/log"
27 "github.com/chaos-mesh/chaos-mesh/pkg/metrics/utils"
28 )
29
30 var (
31
32 DefaultChaosDaemonMetricsCollector = NewChaosDaemonMetricsCollector(log.L().WithName("chaos-daemon").WithName("metrics"))
33
34
35 ChaosDaemonGrpcServerBuckets = []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 10}
36 )
37
38 const (
39
40
41
42 kubernetesPodNameLabel = "io.kubernetes.pod.name"
43 kubernetesPodNamespaceLabel = "io.kubernetes.pod.namespace"
44 kubernetesContainerNameLabel = "io.kubernetes.container.name"
45 )
46
47 func WithHistogramName(name string) grpcprometheus.HistogramOption {
48 return func(opts *prometheus.HistogramOpts) {
49 opts.Name = name
50 }
51 }
52
53 type ChaosDaemonMetricsCollector struct {
54 crClient crclients.ContainerRuntimeInfoClient
55 logger logr.Logger
56 iptablesPackets *prometheus.GaugeVec
57 iptablesPacketBytes *prometheus.GaugeVec
58 ipsetMembers *prometheus.GaugeVec
59 tcRules *prometheus.GaugeVec
60 }
61
62
63 func NewChaosDaemonMetricsCollector(logger logr.Logger) *ChaosDaemonMetricsCollector {
64 return &ChaosDaemonMetricsCollector{
65 logger: logger,
66 iptablesPackets: prometheus.NewGaugeVec(prometheus.GaugeOpts{
67 Name: "chaos_daemon_iptables_packets",
68 Help: "Total number of iptables packets",
69 }, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}),
70 iptablesPacketBytes: prometheus.NewGaugeVec(prometheus.GaugeOpts{
71 Name: "chaos_daemon_iptables_packet_bytes",
72 Help: "Total bytes of iptables packets",
73 }, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}),
74 ipsetMembers: prometheus.NewGaugeVec(prometheus.GaugeOpts{
75 Name: "chaos_daemon_ipset_members",
76 Help: "Total number of ipset members",
77 }, []string{"namespace", "pod", "container"}),
78 tcRules: prometheus.NewGaugeVec(prometheus.GaugeOpts{
79 Name: "chaos_daemon_tcs",
80 Help: "Total number of tc rules",
81 }, []string{"namespace", "pod", "container"}),
82 }
83 }
84
85 func (collector *ChaosDaemonMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
86 collector.iptablesPackets.Describe(ch)
87 collector.iptablesPacketBytes.Describe(ch)
88 collector.ipsetMembers.Describe(ch)
89 collector.tcRules.Describe(ch)
90 }
91
92 func (collector *ChaosDaemonMetricsCollector) Collect(ch chan<- prometheus.Metric) {
93 collector.collectNetworkMetrics()
94 collector.iptablesPackets.Collect(ch)
95 collector.iptablesPacketBytes.Collect(ch)
96 collector.ipsetMembers.Collect(ch)
97 collector.tcRules.Collect(ch)
98 }
99
100 func (collector *ChaosDaemonMetricsCollector) InjectCrClient(client crclients.ContainerRuntimeInfoClient) *ChaosDaemonMetricsCollector {
101 collector.crClient = client
102 return collector
103 }
104
105 func (collector *ChaosDaemonMetricsCollector) collectNetworkMetrics() {
106 collector.iptablesPackets.Reset()
107 collector.iptablesPacketBytes.Reset()
108 collector.ipsetMembers.Reset()
109 collector.tcRules.Reset()
110
111 containerIDs, err := collector.crClient.ListContainerIDs(context.Background())
112 if err != nil {
113 collector.logger.Error(err, "fail to list all container process IDs")
114 return
115 }
116
117 for _, containerID := range containerIDs {
118 pid, err := collector.crClient.GetPidFromContainerID(context.Background(), containerID)
119 if err != nil {
120 collector.logger.Error(err, "fail to get pid from container ID")
121 continue
122 }
123
124 labels, err := collector.crClient.GetLabelsFromContainerID(context.Background(), containerID)
125 if err != nil {
126 collector.logger.Error(err, "fail to get container labels", "containerID", containerID)
127 continue
128 }
129
130 namespace, podName, containerName := labels[kubernetesPodNamespaceLabel],
131 labels[kubernetesPodNameLabel], labels[kubernetesContainerNameLabel]
132
133 labelValues := []string{namespace, podName, containerName}
134 log := collector.logger.WithValues(
135 "namespace", namespace,
136 "podName", podName,
137 "containerName", containerName,
138 "containerID", containerID,
139 )
140
141 tables, err := utils.GetIptablesContentByNetNS(pid)
142 if err != nil {
143 log.Error(err, "fail to collect iptables metrics")
144 }
145 for tableName, table := range tables {
146 for chainName, chain := range table {
147 for _, rule := range chain.Rules {
148 collector.iptablesPackets.
149 WithLabelValues(namespace, podName, containerName, tableName, chainName, chain.Policy, rule.Rule).
150 Set(float64(rule.Packets))
151
152 collector.iptablesPacketBytes.
153 WithLabelValues(namespace, podName, containerName, tableName, chainName, chain.Policy, rule.Rule).
154 Set(float64(rule.Bytes))
155 }
156 }
157 }
158
159 members, err := utils.GetIPSetRulesNumberByNetNS(pid)
160 if err != nil {
161 log.Error(err, "fail to collect ipset member metric")
162 }
163 collector.ipsetMembers.WithLabelValues(labelValues...).Set(float64(members))
164
165 tcRules, err := utils.GetTcRulesNumberByNetNS(pid)
166 if err != nil {
167 log.Error(err, "fail to collect tc rules metric")
168 }
169 collector.tcRules.WithLabelValues(labelValues...).Set(float64(tcRules))
170 }
171 }
172