...

Source file src/github.com/chaos-mesh/chaos-mesh/e2e-test/e2e/e2e.go

Documentation: github.com/chaos-mesh/chaos-mesh/e2e-test/e2e

     1  // Copyright 2021 Chaos Mesh Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  
    16  package e2e
    17  
    18  import (
    19  	"context" // load pprof
    20  	_ "net/http/pprof"
    21  	"os/exec"
    22  	"time"
    23  
    24  	"github.com/onsi/ginkgo/v2"
    25  	v1 "k8s.io/api/core/v1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/client-go/kubernetes"
    28  	_ "k8s.io/client-go/plugin/pkg/client/auth"
    29  	"k8s.io/klog/v2"
    30  	_ "k8s.io/kubelet"
    31  	"k8s.io/kubernetes/test/e2e/framework"
    32  	e2edebug "k8s.io/kubernetes/test/e2e/framework/debug"
    33  	e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
    34  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    35  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    36  	utilnet "k8s.io/utils/net"
    37  
    38  	test "github.com/chaos-mesh/chaos-mesh/e2e-test"
    39  	e2econfig "github.com/chaos-mesh/chaos-mesh/e2e-test/e2e/config" // ensure auth plugins are loaded
    40  )
    41  
    42  const namespaceCleanupTimeout = 15 * time.Minute
    43  
    44  // This is modified from framework.SetupSuite().
    45  // setupSuite is the boilerplate that can be used to setup ginkgo test suites, on the SynchronizedBeforeSuite step.
    46  // There are certain operations we only want to run once per overall test invocation
    47  // (such as deleting old namespaces, or verifying that all system pods are running.
    48  // Because of the way Ginkgo runs tests in parallel, we must use SynchronizedBeforeSuite
    49  // to ensure that these operations only run on the first parallel Ginkgo node.
    50  func setupSuite(ctx context.Context) {
    51  	// Run only on Ginkgo node 1
    52  
    53  	c, err := framework.LoadClientset()
    54  	if err != nil {
    55  		klog.Fatal("Error loading client: ", err)
    56  	}
    57  
    58  	// Delete any namespaces except those created by the system. This ensures no
    59  	// lingering resources are left over from a previous test run.
    60  	if framework.TestContext.CleanStart {
    61  		deleted, err := framework.DeleteNamespaces(ctx, c, nil, /* deleteFilter */
    62  			[]string{
    63  				metav1.NamespaceSystem,
    64  				metav1.NamespaceDefault,
    65  				metav1.NamespacePublic,
    66  				v1.NamespaceNodeLease,
    67  				// kind local path provisioner namespace since 0.7.0
    68  				// https://github.com/kubernetes-sigs/kind/blob/v0.7.0/pkg/build/node/storage.go#L35
    69  				"local-path-storage",
    70  			})
    71  		if err != nil {
    72  			framework.Failf("Error deleting orphaned namespaces: %v", err)
    73  		}
    74  		klog.Infof("Waiting for deletion of the following namespaces: %v", deleted)
    75  		if err := framework.WaitForNamespacesDeleted(ctx, c, deleted, namespaceCleanupTimeout); err != nil {
    76  			framework.Failf("Failed to delete orphaned namespaces %v: %v", deleted, err)
    77  		}
    78  	}
    79  
    80  	timeouts := framework.NewTimeoutContext()
    81  
    82  	// In large clusters we may get to this point but still have a bunch
    83  	// of nodes without Routes created. Since this would make a node
    84  	// unschedulable, we need to wait until all of them are schedulable.
    85  	framework.ExpectNoError(e2enode.WaitForAllNodesSchedulable(ctx, c, timeouts.NodeSchedulable))
    86  
    87  	//// If NumNodes is not specified then auto-detect how many are scheduleable and not tainted
    88  	//if framework.TestContext.CloudConfig.NumNodes == framework.DefaultNumNodes {
    89  	//	framework.TestContext.CloudConfig.NumNodes = len(framework.GetReadySchedulableNodesOrDie(c).Items)
    90  	//}
    91  
    92  	// Ensure all pods are running and ready before starting tests (otherwise,
    93  	// cluster infrastructure pods that are being pulled or started can block
    94  	// test pods from running, and tests that ensure all pods are running and
    95  	// ready will fail).
    96  	podStartupTimeout := timeouts.SystemPodsStartup
    97  	// TODO: In large clusters, we often observe a non-starting pods due to
    98  	// #41007. To avoid those pods preventing the whole test runs (and just
    99  	// wasting the whole run), we allow for some not-ready pods (with the
   100  	// number equal to the number of allowed not-ready nodes).
   101  	if err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), podStartupTimeout); err != nil {
   102  		e2edebug.DumpAllNamespaceInfo(ctx, c, metav1.NamespaceSystem)
   103  		e2ekubectl.LogFailedContainers(ctx, c, metav1.NamespaceSystem, framework.Logf)
   104  		framework.Failf("Error waiting for all pods to be running and ready: %v", err)
   105  	}
   106  
   107  	//if err := framework.WaitForDaemonSets(c, metav1.NamespaceSystem, int32(framework.TestContext.AllowedNotReadyNodes), framework.TestContext.SystemDaemonsetStartupTimeout); err != nil {
   108  	//	framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err)
   109  	//}
   110  
   111  	dc := c.DiscoveryClient
   112  
   113  	serverVersion, serverErr := dc.ServerVersion()
   114  	if serverErr != nil {
   115  		framework.Logf("Unexpected server error retrieving version: %v", serverErr)
   116  	}
   117  	if serverVersion != nil {
   118  		framework.Logf("kube-apiserver version: %s", serverVersion.GitVersion)
   119  	}
   120  }
   121  
   122  var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
   123  	if e2econfig.TestConfig.InstallChaosMesh {
   124  		ginkgo.By("Clear all helm releases")
   125  		helmClearCmd := "helm ls --all --short | xargs -n 1 -r helm delete --purge"
   126  		if err := exec.Command("sh", "-c", helmClearCmd).Run(); err != nil {
   127  			framework.Failf("failed to clear helm releases (cmd: %q, error: %v", helmClearCmd, err)
   128  		}
   129  		ginkgo.By("Clear non-kubernetes apiservices")
   130  		clearNonK8SAPIServicesCmd := "kubectl delete apiservices -l kube-aggregator.kubernetes.io/automanaged!=onstart"
   131  		if err := exec.Command("sh", "-c", clearNonK8SAPIServicesCmd).Run(); err != nil {
   132  			framework.Failf("failed to clear non-kubernetes apiservices (cmd: %q, error: %v", clearNonK8SAPIServicesCmd, err)
   133  		}
   134  
   135  		setupSuite(context.Background())
   136  
   137  		// Get clients
   138  		oa, ocfg, err := test.BuildOperatorActionAndCfg(e2econfig.TestConfig)
   139  		framework.ExpectNoError(err, "failed to create operator action")
   140  		oa.CleanCRDOrDie()
   141  		err = oa.InstallCRD(ocfg)
   142  		framework.ExpectNoError(err, "failed to install crd")
   143  		err = oa.DeployOperator(ocfg)
   144  		framework.ExpectNoError(err, "failed to install chaos-mesh")
   145  	}
   146  	return nil
   147  }, func(data []byte) {
   148  	// Run on all Ginkgo nodes
   149  	setupSuitePerGinkgoNode()
   150  })
   151  
   152  func setupSuitePerGinkgoNode() {
   153  	c, err := framework.LoadClientset()
   154  	if err != nil {
   155  		klog.Fatal("Error loading client: ", err)
   156  	}
   157  	framework.TestContext.IPFamily = getDefaultClusterIPFamily(c)
   158  	framework.Logf("Cluster IP family: %s", framework.TestContext.IPFamily)
   159  }
   160  
   161  // getDefaultClusterIPFamily obtains the default IP family of the cluster
   162  // using the Cluster IP address of the kubernetes service created in the default namespace
   163  // This unequivocally identifies the default IP family because services are single family
   164  // TODO: dual-stack may support multiple families per service
   165  // but we can detect if a cluster is dual stack because pods have two addresses (one per family)
   166  func getDefaultClusterIPFamily(c kubernetes.Interface) string {
   167  	// Get the ClusterIP of the kubernetes service created in the default namespace
   168  	svc, err := c.CoreV1().Services(metav1.NamespaceDefault).Get(context.TODO(), "kubernetes", metav1.GetOptions{})
   169  	if err != nil {
   170  		framework.Failf("Failed to get kubernetes service ClusterIP: %v", err)
   171  	}
   172  
   173  	if utilnet.IsIPv6String(svc.Spec.ClusterIP) {
   174  		return "ipv6"
   175  	}
   176  	return "ipv4"
   177  }
   178