diff --git a/go.mod b/go.mod index c8cf43114b30..44ffb82f0ec9 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,8 @@ module github.com/aws/karpenter-provider-aws go 1.23.2 +replace sigs.k8s.io/karpenter => github.com/engedaam/karpenter-core v0.0.0-20241117033623-cab6157220e3 + require ( github.com/Pallinder/go-randomdata v1.2.0 github.com/PuerkitoBio/goquery v1.10.0 diff --git a/go.sum b/go.sum index 26fb33c51944..6365dcec26a5 100644 --- a/go.sum +++ b/go.sum @@ -74,6 +74,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/engedaam/karpenter-core v0.0.0-20241117033623-cab6157220e3 h1:aONl6Y1vKsXK9ipJjSlulSWVA7HwrN9leTy706mG4dw= +github.com/engedaam/karpenter-core v0.0.0-20241117033623-cab6157220e3/go.mod h1:zolnK/3MxqSPEhEan2VBbzuGdReJPFTbpYWGivwTgic= github.com/evanphx/json-patch v5.7.0+incompatible h1:vgGkfT/9f8zE6tvSCe74nfpAVDQ2tG6yudJd8LBksgI= github.com/evanphx/json-patch v5.7.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= @@ -323,8 +325,6 @@ sigs.k8s.io/controller-runtime v0.19.1 h1:Son+Q40+Be3QWb+niBXAg2vFiYWolDjjRfO8hn sigs.k8s.io/controller-runtime v0.19.1/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v1.0.1-0.20241115180652-995040b20d0a h1:fbD7tYsZCIu3uzgJKO9/XWS6uYPCeWxQmwAvP2jtXRE= -sigs.k8s.io/karpenter v1.0.1-0.20241115180652-995040b20d0a/go.mod h1:zolnK/3MxqSPEhEan2VBbzuGdReJPFTbpYWGivwTgic= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 3190f63982c9..3faeba065877 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -250,18 +250,18 @@ func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy { return []cloudprovider.RepairPolicy{ // Supported Kubelet fields { - ConditionType: "Ready", + ConditionType: corev1.NodeReady, ConditionStatus: corev1.ConditionFalse, TolerationDuration: 30 * time.Minute, }, { - ConditionType: "DiskPressure", - ConditionStatus: corev1.ConditionFalse, + ConditionType: corev1.NodeDiskPressure, + ConditionStatus: corev1.ConditionTrue, TolerationDuration: 30 * time.Minute, }, { - ConditionType: "MemoryPressure", - ConditionStatus: corev1.ConditionFalse, + ConditionType: corev1.NodeMemoryPressure, + ConditionStatus: corev1.ConditionTrue, TolerationDuration: 30 * time.Minute, }, } diff --git a/pkg/fake/cloudprovider.go b/pkg/fake/cloudprovider.go index f6e3c4c83475..d21cd3d89bdb 100644 --- a/pkg/fake/cloudprovider.go +++ b/pkg/fake/cloudprovider.go @@ -22,7 +22,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" - "sigs.k8s.io/karpenter/pkg/cloudprovider" corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/test" @@ -91,6 +90,6 @@ func (c *CloudProvider) GetSupportedNodeClasses() []status.Object { return []status.Object{&v1.EC2NodeClass{}} } -func (c *CloudProvider) RepairPolicy() []cloudprovider.RepairStatements { - return []cloudprovider.RepairStatements{} +func (c *CloudProvider) RepairPolicies() []corecloudprovider.RepairPolicy { + return []corecloudprovider.RepairPolicy{} } diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index 4112b24c24dd..d3d8acbc2b4a 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -89,6 +89,36 @@ func (env *Environment) ExpectUpdated(objects ...client.Object) { } } +// ExpectStatusUpdated will update objects in the cluster to match the inputs. +// WARNING: This ignores the resource version check, which can result in +// overwriting changes made by other controllers in the cluster. +// This is useful in ensuring that we can clean up resources by patching +// out finalizers. +// Grab the object before making the updates to reduce the chance of this race. +func (env *Environment) ExpectStatusUpdated(objects ...client.Object) { + GinkgoHelper() + for _, o := range objects { + Eventually(func(g Gomega) { + current := o.DeepCopyObject().(client.Object) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(current), current)).To(Succeed()) + if current.GetResourceVersion() != o.GetResourceVersion() { + log.FromContext(env).Info(fmt.Sprintf("detected an update to an object (%s) with an outdated resource version, did you get the latest version of the object before patching?", lo.Must(apiutil.GVKForObject(o, env.Client.Scheme())))) + } + o.SetResourceVersion(current.GetResourceVersion()) + g.Expect(env.Client.Status().Update(env.Context, o)).To(Succeed()) + }).WithTimeout(time.Second * 10).Should(Succeed()) + } +} + +func ReplaceNodeConditions(node *corev1.Node, conds ...corev1.NodeCondition) *corev1.Node { + keys := sets.New[string](lo.Map(conds, func(c corev1.NodeCondition, _ int) string { return string(c.Type) })...) + node.Status.Conditions = lo.Reject(node.Status.Conditions, func(c corev1.NodeCondition, _ int) bool { + return keys.Has(string(c.Type)) + }) + node.Status.Conditions = append(node.Status.Conditions, conds...) + return node +} + // ExpectCreatedOrUpdated can update objects in the cluster to match the inputs. // WARNING: ExpectUpdated ignores the resource version check, which can result in // overwriting changes made by other controllers in the cluster. diff --git a/test/suites/integration/repair_policy_test.go b/test/suites/integration/repair_policy_test.go new file mode 100644 index 000000000000..23c8ffb66ce2 --- /dev/null +++ b/test/suites/integration/repair_policy_test.go @@ -0,0 +1,86 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + karpenterv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + coretest "sigs.k8s.io/karpenter/pkg/test" + + "github.com/aws/karpenter-provider-aws/test/pkg/environment/common" + + . "github.com/onsi/ginkgo/v2" + "github.com/samber/lo" +) + +var _ = Describe("Repair Policy", func() { + var selector labels.Selector + var dep *appsv1.Deployment + var numPods int + + BeforeEach(func() { + numPods = 1 + // Add pods with a do-not-disrupt annotation so that we can check node metadata before we disrupt + dep = coretest.Deployment(coretest.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "my-app", + }, + Annotations: map[string]string{ + karpenterv1.DoNotDisruptAnnotationKey: "true", + }, + }, + TerminationGracePeriodSeconds: lo.ToPtr[int64](0), + }, + }) + selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + }) + + DescribeTable("Conditions", func(unhealthyCondition corev1.NodeCondition) { + env.ExpectCreated(nodeClass, nodePool, dep) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + env.EventuallyExpectInitializedNodeCount("==", 1) + + common.ReplaceNodeConditions(node, unhealthyCondition) + env.ExpectStatusUpdated(node) + + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }, + Entry("Kubelet Readiness", corev1.NodeCondition{ + Type: corev1.NodeReady, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + Entry("Kubelet DiskPressure", corev1.NodeCondition{ + Type: corev1.NodeDiskPressure, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + Entry("Kubelet MemoryPressure", corev1.NodeCondition{ + Type: corev1.NodeMemoryPressure, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)}, + }), + ) +})