1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package podchaos
17
18 import (
19 "context"
20 "time"
21
22 . "github.com/onsi/ginkgo/v2"
23 corev1 "k8s.io/api/core/v1"
24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25 "k8s.io/apimachinery/pkg/labels"
26 "k8s.io/apimachinery/pkg/types"
27 "k8s.io/apimachinery/pkg/util/wait"
28 "k8s.io/client-go/kubernetes"
29 "k8s.io/kubernetes/test/e2e/framework"
30 "k8s.io/utils/pointer"
31 "sigs.k8s.io/controller-runtime/pkg/client"
32
33 "github.com/chaos-mesh/chaos-mesh/api/v1alpha1"
34 "github.com/chaos-mesh/chaos-mesh/e2e-test/e2e/config"
35 "github.com/chaos-mesh/chaos-mesh/e2e-test/e2e/util"
36 "github.com/chaos-mesh/chaos-mesh/e2e-test/pkg/fixture"
37 )
38
39 func TestcasePodFailureOnceThenDelete(ns string, kubeCli kubernetes.Interface, cli client.Client) {
40 ctx, cancel := context.WithCancel(context.Background())
41 defer cancel()
42
43 By("preparing experiment pods")
44 appName := "timer-pod-failure1"
45 nd := fixture.NewTimerDeployment(appName, ns)
46 _, err := kubeCli.AppsV1().Deployments(ns).Create(context.TODO(), nd, metav1.CreateOptions{})
47 framework.ExpectNoError(err, "create timer deployment error")
48 err = util.WaitDeploymentReady(appName, ns, kubeCli)
49 framework.ExpectNoError(err, "wait timer deployment ready error")
50
51 By("create pod failure chaos CRD objects")
52 listOption := metav1.ListOptions{
53 LabelSelector: labels.SelectorFromSet(map[string]string{
54 "app": appName,
55 }).String(),
56 }
57 podFailureChaos := &v1alpha1.PodChaos{
58 ObjectMeta: metav1.ObjectMeta{
59 Name: "timer-failure1",
60 Namespace: ns,
61 },
62 Spec: v1alpha1.PodChaosSpec{
63 Action: v1alpha1.PodFailureAction,
64 ContainerSelector: v1alpha1.ContainerSelector{
65 PodSelector: v1alpha1.PodSelector{
66 Selector: v1alpha1.PodSelectorSpec{
67 GenericSelectorSpec: v1alpha1.GenericSelectorSpec{
68 Namespaces: []string{
69 ns,
70 },
71 LabelSelectors: map[string]string{
72 "app": appName,
73 },
74 },
75 },
76 Mode: v1alpha1.OneMode,
77 },
78 },
79 },
80 }
81
82 err = cli.Create(ctx, podFailureChaos)
83 framework.ExpectNoError(err, "create pod failure chaos error")
84
85 By("waiting for assertion some pod fall into failure")
86 err = wait.Poll(5*time.Second, 5*time.Minute, func() (done bool, err error) {
87 pods, err := kubeCli.CoreV1().Pods(ns).List(context.TODO(), listOption)
88 if err != nil {
89 return false, nil
90 }
91 if len(pods.Items) != 1 {
92 return false, nil
93 }
94 pod := pods.Items[0]
95 for _, c := range pod.Spec.Containers {
96 if c.Image == config.TestConfig.PauseImage {
97 return true, nil
98 }
99 }
100 return false, nil
101 })
102 framework.ExpectNoError(err, "failed to verify PodFailure")
103
104 By("delete pod failure chaos CRD objects")
105 err = cli.Delete(ctx, podFailureChaos)
106 framework.ExpectNoError(err, "failed to delete pod failure chaos")
107
108 By("waiting for assertion recovering")
109 err = wait.Poll(5*time.Second, 2*time.Minute, func() (done bool, err error) {
110 pods, err := kubeCli.CoreV1().Pods(ns).List(context.TODO(), listOption)
111 if err != nil {
112 return false, nil
113 }
114 if len(pods.Items) != 1 {
115 return false, nil
116 }
117 pod := pods.Items[0]
118 for _, c := range pod.Spec.Containers {
119 if c.Image == nd.Spec.Template.Spec.Containers[0].Image {
120 return true, nil
121 }
122 }
123 return false, nil
124 })
125 framework.ExpectNoError(err, "pod failure recover failed")
126 }
127
128 func TestcasePodFailurePauseThenUnPause(ns string, kubeCli kubernetes.Interface, cli client.Client) {
129 ctx, cancel := context.WithCancel(context.Background())
130 defer cancel()
131
132 By("preparing experiment pods")
133 appName := "timer-pod-failure2"
134 nd := fixture.NewTimerDeployment(appName, ns)
135 _, err := kubeCli.AppsV1().Deployments(ns).Create(context.TODO(), nd, metav1.CreateOptions{})
136 framework.ExpectNoError(err, "create timer deployment error")
137 err = util.WaitDeploymentReady(appName, ns, kubeCli)
138 framework.ExpectNoError(err, "wait timer deployment ready error")
139
140 By("create pod failure chaos CRD objects")
141 var pods *corev1.PodList
142 listOption := metav1.ListOptions{
143 LabelSelector: labels.SelectorFromSet(map[string]string{
144 "app": appName,
145 }).String(),
146 }
147
148 podFailureChaos := &v1alpha1.PodChaos{
149 ObjectMeta: metav1.ObjectMeta{
150 Name: "timer-failure2",
151 Namespace: ns,
152 },
153 Spec: v1alpha1.PodChaosSpec{
154 Action: v1alpha1.PodFailureAction,
155 Duration: pointer.String("9m"),
156 ContainerSelector: v1alpha1.ContainerSelector{
157 PodSelector: v1alpha1.PodSelector{
158 Selector: v1alpha1.PodSelectorSpec{
159 GenericSelectorSpec: v1alpha1.GenericSelectorSpec{
160 Namespaces: []string{
161 ns,
162 },
163 LabelSelectors: map[string]string{
164 "app": appName,
165 },
166 },
167 },
168 Mode: v1alpha1.OneMode,
169 },
170 },
171 },
172 }
173 err = cli.Create(ctx, podFailureChaos)
174 framework.ExpectNoError(err, "create pod failure chaos error")
175 chaosKey := types.NamespacedName{
176 Namespace: ns,
177 Name: "timer-failure2",
178 }
179
180 By("waiting for assertion some pod fall into failure")
181
182 err = wait.Poll(5*time.Second, 5*time.Minute, func() (done bool, err error) {
183 pods, err := kubeCli.CoreV1().Pods(ns).List(context.TODO(), listOption)
184 if err != nil {
185 return false, nil
186 }
187 pod := pods.Items[0]
188 for _, c := range pod.Spec.Containers {
189 if c.Image == config.TestConfig.PauseImage {
190 return true, nil
191 }
192 }
193 return false, nil
194 })
195 framework.ExpectNoError(err, "image not update to pause")
196
197
198 By("pause pod failure chaos")
199 err = util.PauseChaos(ctx, cli, podFailureChaos)
200 framework.ExpectNoError(err, "pause chaos error")
201
202 By("waiting for assertion about chaos experiment paused")
203 err = wait.Poll(5*time.Second, 5*time.Minute, func() (done bool, err error) {
204 chaos := &v1alpha1.PodChaos{}
205 err = cli.Get(ctx, chaosKey, chaos)
206 framework.ExpectNoError(err, "get pod chaos error")
207 if chaos.Status.Experiment.DesiredPhase == v1alpha1.StoppedPhase {
208 return true, nil
209 }
210 return false, err
211 })
212 framework.ExpectNoError(err, "check paused chaos failed")
213
214 By("wait for 30 seconds and no pod failure")
215 pods, err = kubeCli.CoreV1().Pods(ns).List(context.TODO(), listOption)
216 framework.ExpectNoError(err, "get timer pod error")
217 err = wait.Poll(5*time.Second, 30*time.Second, func() (done bool, err error) {
218 pods, err = kubeCli.CoreV1().Pods(ns).List(context.TODO(), listOption)
219 framework.ExpectNoError(err, "get timer pod error")
220 pod := pods.Items[0]
221 for _, c := range pod.Spec.Containers {
222 if c.Image == config.TestConfig.PauseImage {
223 return false, nil
224 }
225 }
226
227 return true, nil
228 })
229 framework.ExpectNoError(err, "check paused chaos failed")
230
231 By("resume paused chaos experiment")
232 err = util.UnPauseChaos(ctx, cli, podFailureChaos)
233 framework.ExpectNoError(err, "resume chaos error")
234
235 By("waiting for assertion about pod failure happens again")
236 err = wait.Poll(5*time.Second, 5*time.Minute, func() (done bool, err error) {
237 chaos := &v1alpha1.PodChaos{}
238 err = cli.Get(ctx, chaosKey, chaos)
239 framework.ExpectNoError(err, "get pod chaos error")
240 if chaos.Status.Experiment.DesiredPhase == v1alpha1.RunningPhase {
241 return true, nil
242 }
243 return false, err
244 })
245 framework.ExpectNoError(err, "check resumed chaos failed")
246
247 By("waiting for assert pod failure happens again")
248 err = wait.Poll(5*time.Second, 5*time.Minute, func() (done bool, err error) {
249 pods, err = kubeCli.CoreV1().Pods(ns).List(context.TODO(), listOption)
250 framework.ExpectNoError(err, "get timer pod error")
251 pod := pods.Items[0]
252 for _, c := range pod.Spec.Containers {
253 if c.Image == config.TestConfig.PauseImage {
254 return true, nil
255 }
256 }
257 return false, nil
258 })
259 framework.ExpectNoError(err, "wait pod failure failed")
260 }
261