1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package metrics
17
18 import (
19 "context"
20
21 "github.com/go-logr/logr"
22 grpcprometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
23 "github.com/prometheus/client_golang/prometheus"
24
25 "github.com/chaos-mesh/chaos-mesh/pkg/chaosdaemon/crclients"
26 "github.com/chaos-mesh/chaos-mesh/pkg/log"
27 "github.com/chaos-mesh/chaos-mesh/pkg/metrics/utils"
28 )
29
30 var (
31
32 DefaultChaosDaemonMetricsCollector = NewChaosDaemonMetricsCollector(log.L().WithName("chaos-daemon").WithName("metrics"))
33
34
35 ChaosDaemonGrpcServerBuckets = []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 10}
36 )
37
38 const (
39
40
41
42 kubernetesPodNameLabel = "io.kubernetes.pod.name"
43 kubernetesPodNamespaceLabel = "io.kubernetes.pod.namespace"
44 kubernetesContainerNameLabel = "io.kubernetes.container.name"
45
46 chaosDaemonMetricsSubsystem = "chaos_daemon"
47 )
48
49 func WithHistogramName(name string) grpcprometheus.HistogramOption {
50 return func(opts *prometheus.HistogramOpts) {
51 opts.Name = name
52 }
53 }
54
55 type ChaosDaemonMetricsCollector struct {
56 crClient crclients.ContainerRuntimeInfoClient
57 logger logr.Logger
58 iptablesPackets *prometheus.GaugeVec
59 iptablesPacketBytes *prometheus.GaugeVec
60 ipsetMembers *prometheus.GaugeVec
61 tcRules *prometheus.GaugeVec
62 }
63
64
65 func NewChaosDaemonMetricsCollector(logger logr.Logger) *ChaosDaemonMetricsCollector {
66 return &ChaosDaemonMetricsCollector{
67 logger: logger,
68 iptablesPackets: prometheus.NewGaugeVec(prometheus.GaugeOpts{
69 Subsystem: chaosDaemonMetricsSubsystem,
70 Name: "iptables_packets",
71 Help: "Total number of iptables packets",
72 }, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}),
73 iptablesPacketBytes: prometheus.NewGaugeVec(prometheus.GaugeOpts{
74 Subsystem: chaosDaemonMetricsSubsystem,
75 Name: "iptables_packet_bytes",
76 Help: "Total bytes of iptables packets",
77 }, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}),
78 ipsetMembers: prometheus.NewGaugeVec(prometheus.GaugeOpts{
79 Subsystem: chaosDaemonMetricsSubsystem,
80 Name: "ipset_members",
81 Help: "Total number of ipset members",
82 }, []string{"namespace", "pod", "container"}),
83 tcRules: prometheus.NewGaugeVec(prometheus.GaugeOpts{
84 Subsystem: chaosDaemonMetricsSubsystem,
85 Name: "tcs_rules",
86 Help: "Total number of tc rules",
87 }, []string{"namespace", "pod", "container"}),
88 }
89 }
90
91 func (collector *ChaosDaemonMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
92 collector.iptablesPackets.Describe(ch)
93 collector.iptablesPacketBytes.Describe(ch)
94 collector.ipsetMembers.Describe(ch)
95 collector.tcRules.Describe(ch)
96 }
97
98 func (collector *ChaosDaemonMetricsCollector) Collect(ch chan<- prometheus.Metric) {
99 collector.collectNetworkMetrics()
100 collector.iptablesPackets.Collect(ch)
101 collector.iptablesPacketBytes.Collect(ch)
102 collector.ipsetMembers.Collect(ch)
103 collector.tcRules.Collect(ch)
104 }
105
106 func (collector *ChaosDaemonMetricsCollector) InjectCrClient(client crclients.ContainerRuntimeInfoClient) *ChaosDaemonMetricsCollector {
107 collector.crClient = client
108 return collector
109 }
110
111 func (collector *ChaosDaemonMetricsCollector) collectNetworkMetrics() {
112 collector.iptablesPackets.Reset()
113 collector.iptablesPacketBytes.Reset()
114 collector.ipsetMembers.Reset()
115 collector.tcRules.Reset()
116
117 containerIDs, err := collector.crClient.ListContainerIDs(context.Background())
118 if err != nil {
119 collector.logger.Error(err, "fail to list all container process IDs")
120 return
121 }
122
123 for _, containerID := range containerIDs {
124 pid, err := collector.crClient.GetPidFromContainerID(context.Background(), containerID)
125 if err != nil {
126 collector.logger.Error(err, "fail to get pid from container ID")
127 continue
128 }
129
130 labels, err := collector.crClient.GetLabelsFromContainerID(context.Background(), containerID)
131 if err != nil {
132 collector.logger.Error(err, "fail to get container labels", "containerID", containerID)
133 continue
134 }
135
136 namespace, podName, containerName := labels[kubernetesPodNamespaceLabel],
137 labels[kubernetesPodNameLabel], labels[kubernetesContainerNameLabel]
138
139 labelValues := []string{namespace, podName, containerName}
140 log := collector.logger.WithValues(
141 "namespace", namespace,
142 "podName", podName,
143 "containerName", containerName,
144 "containerID", containerID,
145 )
146
147 tables, err := utils.GetIptablesContentByNetNS(pid)
148 if err != nil {
149 log.Error(err, "fail to collect iptables metrics")
150 }
151 for tableName, table := range tables {
152 for chainName, chain := range table {
153 for _, rule := range chain.Rules {
154 collector.iptablesPackets.
155 WithLabelValues(namespace, podName, containerName, tableName, chainName, chain.Policy, rule.Rule).
156 Set(float64(rule.Packets))
157
158 collector.iptablesPacketBytes.
159 WithLabelValues(namespace, podName, containerName, tableName, chainName, chain.Policy, rule.Rule).
160 Set(float64(rule.Bytes))
161 }
162 }
163 }
164
165 members, err := utils.GetIPSetRulesNumberByNetNS(pid)
166 if err != nil {
167 log.Error(err, "fail to collect ipset member metric")
168 }
169 collector.ipsetMembers.WithLabelValues(labelValues...).Set(float64(members))
170
171 tcRules, err := utils.GetTcRulesNumberByNetNS(pid)
172 if err != nil {
173 log.Error(err, "fail to collect tc rules metric")
174 }
175 collector.tcRules.WithLabelValues(labelValues...).Set(float64(tcRules))
176 }
177 }
178