Skip to content

Commit d660b80

Browse files
Check min size of node group and resource limits for set of nodes
1 parent 9f04d4f commit d660b80

File tree

3 files changed

+176
-21
lines changed

3 files changed

+176
-21
lines changed

cluster-autoscaler/core/scaledown/unneeded/nodes.go

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -118,20 +118,28 @@ func (n *Nodes) Drop(node string) {
118118
// into empty and non-empty node lists, as well as a list of nodes that were
119119
// unneeded, but are not removable, annotated by reason.
120120
func (n *Nodes) RemovableAt(context *context.AutoscalingContext, ts time.Time, resourcesLeft resource.Limits, resourcesWithLimits []string, as scaledown.ActuationStatus) (empty, needDrain []simulator.NodeToBeRemoved, unremovable []*simulator.UnremovableNode) {
121-
nodeGroupSize := utils.GetNodeGroupSizeMap(context.CloudProvider)
122-
for nodeName, v := range n.byName {
123-
klog.V(2).Infof("%s was unneeded for %s", nodeName, ts.Sub(v.since).String())
121+
nodeGroupSize := make(map[string]int)
122+
for k, v := range utils.GetNodeGroupSizeMap(context.CloudProvider) {
123+
nodeGroupSize[k] = v
124+
}
125+
resourcesLeftCopy := resourcesLeft.DeepCopy()
126+
emptyNodes, drainNodes := n.sortEmptyAndNonEmptyNodes()
124127

125-
if r := n.unremovableReason(context, v, ts, nodeGroupSize, resourcesLeft, resourcesWithLimits, as); r != simulator.NoReason {
128+
for nodeName, v := range emptyNodes {
129+
klog.V(2).Infof("%s was unneeded for %s", nodeName, ts.Sub(v.since).String())
130+
if r := n.unremovableReason(context, v, ts, nodeGroupSize, resourcesLeftCopy, resourcesWithLimits, as); r != simulator.NoReason {
126131
unremovable = append(unremovable, &simulator.UnremovableNode{Node: v.ntbr.Node, Reason: r})
127132
continue
128133
}
129-
130-
if len(v.ntbr.PodsToReschedule) > 0 {
131-
needDrain = append(needDrain, v.ntbr)
132-
} else {
133-
empty = append(empty, v.ntbr)
134+
empty = append(empty, v.ntbr)
135+
}
136+
for nodeName, v := range drainNodes {
137+
klog.V(2).Infof("%s was unneeded for %s", nodeName, ts.Sub(v.since).String())
138+
if r := n.unremovableReason(context, v, ts, nodeGroupSize, resourcesLeftCopy, resourcesWithLimits, as); r != simulator.NoReason {
139+
unremovable = append(unremovable, &simulator.UnremovableNode{Node: v.ntbr.Node, Reason: r})
140+
continue
134141
}
142+
needDrain = append(needDrain, v.ntbr)
135143
}
136144
return
137145
}
@@ -177,16 +185,8 @@ func (n *Nodes) unremovableReason(context *context.AutoscalingContext, v *node,
177185
}
178186
}
179187

180-
size, found := nodeGroupSize[nodeGroup.Id()]
181-
if !found {
182-
klog.Errorf("Error while checking node group size %s: group size not found in cache", nodeGroup.Id())
183-
return simulator.UnexpectedError
184-
}
185-
186-
deletionsInProgress := as.DeletionsCount(nodeGroup.Id())
187-
if size-deletionsInProgress <= nodeGroup.MinSize() {
188-
klog.V(1).Infof("Skipping %s - node group min size reached", node.Name)
189-
return simulator.NodeGroupMinSizeReached
188+
if reason := verifyMinSize(node.Name, nodeGroup, nodeGroupSize, as); reason != simulator.NoReason {
189+
return reason
190190
}
191191

192192
resourceDelta, err := n.limitsFinder.DeltaForNode(context, node, nodeGroup, resourcesWithLimits)
@@ -195,7 +195,7 @@ func (n *Nodes) unremovableReason(context *context.AutoscalingContext, v *node,
195195
return simulator.UnexpectedError
196196
}
197197

198-
checkResult := resourcesLeft.CheckDeltaWithinLimits(resourceDelta)
198+
checkResult := resourcesLeft.TryDecrementBy(resourceDelta)
199199
if checkResult.Exceeded() {
200200
klog.V(4).Infof("Skipping %s - minimal limit exceeded for %v", node.Name, checkResult.ExceededResources)
201201
for _, resource := range checkResult.ExceededResources {
@@ -213,3 +213,31 @@ func (n *Nodes) unremovableReason(context *context.AutoscalingContext, v *node,
213213

214214
return simulator.NoReason
215215
}
216+
217+
func (n *Nodes) sortEmptyAndNonEmptyNodes() (empty, needDrain map[string]*node) {
218+
empty = make(map[string]*node)
219+
needDrain = make(map[string]*node)
220+
for name, v := range n.byName {
221+
if len(v.ntbr.PodsToReschedule) > 0 {
222+
needDrain[name] = v
223+
} else {
224+
empty[name] = v
225+
}
226+
}
227+
return
228+
}
229+
230+
func verifyMinSize(nodeName string, nodeGroup cloudprovider.NodeGroup, nodeGroupSize map[string]int, as scaledown.ActuationStatus) simulator.UnremovableReason {
231+
size, found := nodeGroupSize[nodeGroup.Id()]
232+
if !found {
233+
klog.Errorf("Error while checking node group size %s: group size not found in cache", nodeGroup.Id())
234+
return simulator.UnexpectedError
235+
}
236+
deletionsInProgress := as.DeletionsCount(nodeGroup.Id())
237+
if size-deletionsInProgress <= nodeGroup.MinSize() {
238+
klog.V(1).Infof("Skipping %s - node group min size reached", nodeName)
239+
return simulator.NodeGroupMinSizeReached
240+
}
241+
nodeGroupSize[nodeGroup.Id()]--
242+
return simulator.NoReason
243+
}

cluster-autoscaler/core/scaledown/unneeded/nodes_test.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,22 @@ limitations under the License.
1717
package unneeded
1818

1919
import (
20+
"fmt"
2021
"testing"
2122
"time"
2223

24+
apiv1 "k8s.io/api/core/v1"
25+
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
26+
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
27+
"k8s.io/autoscaler/cluster-autoscaler/config"
28+
"k8s.io/autoscaler/cluster-autoscaler/context"
29+
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/resource"
30+
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
31+
. "k8s.io/autoscaler/cluster-autoscaler/core/test"
2332
"k8s.io/autoscaler/cluster-autoscaler/simulator"
33+
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
2434
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
35+
"k8s.io/client-go/kubernetes/fake"
2536

2637
"github.com/stretchr/testify/assert"
2738
)
@@ -115,3 +126,119 @@ func makeNode(name, version string) simulator.NodeToBeRemoved {
115126
func version(n simulator.NodeToBeRemoved) string {
116127
return n.Node.Annotations[testVersion]
117128
}
129+
130+
func TestRemovableAt(t *testing.T) {
131+
132+
testCases := []struct {
133+
name string
134+
numEmpty int
135+
numDrain int
136+
minSize int
137+
targetSize int
138+
numOngoingDeletions int
139+
numEmptyToRemove int
140+
numDrainToRemove int
141+
}{
142+
{
143+
name: "Node group min size is not reached",
144+
numEmpty: 3,
145+
numDrain: 2,
146+
minSize: 1,
147+
targetSize: 10,
148+
numOngoingDeletions: 2,
149+
numEmptyToRemove: 3,
150+
numDrainToRemove: 2,
151+
},
152+
{
153+
name: "Node group min size is reached for drain nodes",
154+
numEmpty: 3,
155+
numDrain: 5,
156+
minSize: 1,
157+
targetSize: 10,
158+
numOngoingDeletions: 2,
159+
numEmptyToRemove: 3,
160+
numDrainToRemove: 4,
161+
},
162+
{
163+
name: "Node group min size is reached for empty and drain nodes",
164+
numEmpty: 3,
165+
numDrain: 5,
166+
minSize: 1,
167+
targetSize: 5,
168+
numOngoingDeletions: 2,
169+
numEmptyToRemove: 2,
170+
numDrainToRemove: 0,
171+
},
172+
}
173+
for _, tc := range testCases {
174+
t.Run(tc.name, func(t *testing.T) {
175+
ng := testprovider.NewTestNodeGroup("ng", 100, tc.minSize, tc.targetSize, true, false, "", nil, nil)
176+
empty := []simulator.NodeToBeRemoved{}
177+
for i := 0; i < tc.numEmpty; i++ {
178+
empty = append(empty, simulator.NodeToBeRemoved{
179+
Node: BuildTestNode(fmt.Sprintf("empty-%d", i), 10, 100),
180+
})
181+
}
182+
drain := []simulator.NodeToBeRemoved{}
183+
for i := 0; i < tc.numDrain; i++ {
184+
drain = append(drain, simulator.NodeToBeRemoved{
185+
Node: BuildTestNode(fmt.Sprintf("drain-%d", i), 10, 100),
186+
PodsToReschedule: []*apiv1.Pod{BuildTestPod(fmt.Sprintf("pod-%d", i), 1, 1)},
187+
})
188+
}
189+
190+
nodes := append(empty, drain...)
191+
provider := testprovider.NewTestCloudProvider(nil, nil)
192+
provider.InsertNodeGroup(ng)
193+
for _, node := range nodes {
194+
provider.AddNode("ng", node.Node)
195+
}
196+
197+
as := &fakeActuationStatus{deletionCount: map[string]int{"ng": tc.numOngoingDeletions}}
198+
199+
rsLister, err := kube_util.NewTestReplicaSetLister(nil)
200+
assert.NoError(t, err)
201+
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, rsLister, nil)
202+
ctx, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{ScaleDownSimulationTimeout: 5 * time.Minute}, &fake.Clientset{}, registry, provider, nil, nil)
203+
assert.NoError(t, err)
204+
205+
n := NewNodes(&fakeScaleDownTimeGetter{}, &resource.LimitsFinder{})
206+
n.Update(nodes, time.Now())
207+
gotEmptyToRemove, gotDrainToRemove, _ := n.RemovableAt(&ctx, time.Now(), resource.Limits{}, []string{}, as)
208+
if len(gotDrainToRemove) != tc.numDrainToRemove || len(gotEmptyToRemove) != tc.numEmptyToRemove {
209+
t.Errorf("%s: getNodesToRemove() return %d, %d, want %d, %d", tc.name, len(gotEmptyToRemove), len(gotDrainToRemove), tc.numEmptyToRemove, tc.numDrainToRemove)
210+
}
211+
})
212+
}
213+
}
214+
215+
type fakeActuationStatus struct {
216+
recentEvictions []*apiv1.Pod
217+
deletionCount map[string]int
218+
}
219+
220+
func (f *fakeActuationStatus) RecentEvictions() []*apiv1.Pod {
221+
return f.recentEvictions
222+
}
223+
224+
func (f *fakeActuationStatus) DeletionsInProgress() ([]string, []string) {
225+
return nil, nil
226+
}
227+
228+
func (f *fakeActuationStatus) DeletionResults() (map[string]status.NodeDeleteResult, time.Time) {
229+
return nil, time.Time{}
230+
}
231+
232+
func (f *fakeActuationStatus) DeletionsCount(nodeGroup string) int {
233+
return f.deletionCount[nodeGroup]
234+
}
235+
236+
type fakeScaleDownTimeGetter struct{}
237+
238+
func (f *fakeScaleDownTimeGetter) GetScaleDownUnneededTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
239+
return 0 * time.Second, nil
240+
}
241+
242+
func (f *fakeScaleDownTimeGetter) GetScaleDownUnreadyTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
243+
return 0 * time.Second, nil
244+
}

cluster-autoscaler/simulator/cluster.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ func (r *RemovalSimulator) SimulateNodeRemoval(
156156
klog.V(2).Infof("nodeInfo for %s not found", nodeName)
157157
return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: UnexpectedError}
158158
}
159-
159+
160160
podsToRemove, daemonSetPods, blockingPod, err := GetPodsToMove(nodeInfo, r.deleteOptions, r.listers, pdbs, timestamp)
161161
if err != nil {
162162
klog.V(2).Infof("node %s cannot be removed: %v", nodeName, err)

0 commit comments

Comments
 (0)