Skip to content

Commit 4e006f3

Browse files
committed
OCI provider based on instance-pools and instance-configurations.
1 parent b2b48c0 commit 4e006f3

23 files changed

+3039
-2
lines changed

cluster-autoscaler/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ You should also take a look at the notes and "gotchas" for your specific cloud p
2525
* [IonosCloud](./cloudprovider/ionoscloud/README.md)
2626
* [OVHcloud](./cloudprovider/ovhcloud/README.md)
2727
* [Linode](./cloudprovider/linode/README.md)
28+
* [OracleCloud](./cloudprovider/oci/README.md)
2829
* [ClusterAPI](./cloudprovider/clusterapi/README.md)
2930
* [BizflyCloud](./cloudprovider/bizflycloud/README.md)
3031

@@ -159,5 +160,6 @@ Supported cloud providers:
159160
* Packet https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/packet/README.md
160161
* OVHcloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ovhcloud/README.md
161162
* Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md
163+
* OCI https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/oci/README.md
162164
* Hetzner https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md
163165
* Cluster API https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md

cluster-autoscaler/cloudprovider/builder/builder_all.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet
1+
// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet,!oci
22

33
/*
44
Copyright 2018 The Kubernetes Authors.
@@ -36,6 +36,7 @@ import (
3636
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ionoscloud"
3737
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/linode"
3838
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/magnum"
39+
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci"
3940
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud"
4041
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/packet"
4142
"k8s.io/autoscaler/cluster-autoscaler/config"
@@ -54,6 +55,7 @@ var AvailableCloudProviders = []string{
5455
cloudprovider.ExoscaleProviderName,
5556
cloudprovider.HuaweicloudProviderName,
5657
cloudprovider.HetznerProviderName,
58+
cloudprovider.OracleCloudProviderName,
5759
cloudprovider.OVHcloudProviderName,
5860
cloudprovider.ClusterAPIProviderName,
5961
cloudprovider.IonoscloudProviderName,
@@ -104,6 +106,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGro
104106
return ionoscloud.BuildIonosCloud(opts, do, rl)
105107
case cloudprovider.LinodeProviderName:
106108
return linode.BuildLinode(opts, do, rl)
109+
case cloudprovider.OracleCloudProviderName:
110+
return oci.BuildOCI(opts, do, rl)
107111
}
108112
return nil
109113
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// +build oci
2+
3+
/*
4+
Copyright 2020 The Kubernetes Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package builder
20+
21+
import (
22+
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
23+
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci"
24+
"k8s.io/autoscaler/cluster-autoscaler/config"
25+
)
26+
27+
// AvailableCloudProviders supported by the cloud provider builder.
28+
var AvailableCloudProviders = []string{
29+
cloudprovider.OracleCloudProviderName,
30+
}
31+
32+
// DefaultCloudProvider for oci-only build is oci.
33+
const DefaultCloudProvider = cloudprovider.OracleCloudProviderName
34+
35+
func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
36+
switch opts.CloudProviderName {
37+
case cloudprovider.OracleCloudProviderName:
38+
return oci.BuildOCI(opts, do, rl)
39+
}
40+
41+
return nil
42+
}

cluster-autoscaler/cloudprovider/cloud_provider.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ const (
6060
HuaweicloudProviderName = "huaweicloud"
6161
// IonoscloudProviderName gets the provider name of ionoscloud
6262
IonoscloudProviderName = "ionoscloud"
63+
// OracleCloudProviderName gets the provider name of oci
64+
OracleCloudProviderName = "oci"
6365
// OVHcloudProviderName gets the provider name of ovhcloud
6466
OVHcloudProviderName = "ovhcloud"
6567
// LinodeProviderName gets the provider name of linode
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Cluster Autoscaler for Oracle Cloud Infrastructure (OCI)
2+
3+
On OCI, the cluster-autoscaler utilizes [Instance Pools](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstancepool.htm)
4+
combined with [Instance Configurations](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstanceconfig.htm) to
5+
automatically resize a cluster's nodes based on application workload demands by:
6+
7+
- adding nodes to static instance-pool(s) when a pod cannot be scheduled in the cluster because of insufficient resource constraints.
8+
- removing nodes from an instance-pool(s) when the nodes have been underutilized for an extended time, and when pods can be placed on other existing nodes.
9+
10+
The cluster-autoscaler works on a per-instance pool basis. You configure the cluster-autoscaler to tell it which instance pools to target
11+
for expansion and contraction, the minimum and maximum sizes for each pool, and how you want the autoscaling to take place.
12+
Instance pools not referenced in the configuration file are not managed by the cluster-autoscaler.
13+
14+
## Create Required OCI Resources
15+
16+
### IAM Policy (if using Instance Principals)
17+
18+
We recommend setting up and configuring the cluster-autoscaler to use
19+
[Instance Principals](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/callingservicesfrominstances.htm)
20+
to authenticate to the OCI APIs.
21+
22+
The following policy provides the minimum privileges necessary for Cluster Autoscaler to run:
23+
24+
1: Create a compartment-level dynamic group containing the nodes (compute instances) in the cluster:
25+
26+
```
27+
All {instance.compartment.id = 'ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf'}
28+
```
29+
30+
2: Create a *tenancy-level* policy to allow nodes to manage instance-pools:
31+
32+
```
33+
Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-pools in compartment <compartment-name>
34+
Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-configurations in compartment <compartment-name>
35+
Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-family in compartment <compartment-name>
36+
Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to use subnets in compartment <compartment-name>
37+
Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to read virtual-network-family in compartment <compartment-name>
38+
Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to use vnics in compartment <compartment-name>
39+
Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to inspect compartments in compartment <compartment-name>
40+
```
41+
42+
### Instance Pool and Instance Configurations
43+
44+
Before you deploy the cluster auto-scaler on OCI, your need to create one or more static Instance Pools and Instance
45+
Configuration with `cloud-init` specified in the launch details so new nodes automatically joins the existing cluster on
46+
start up.
47+
48+
Advanced Instance Pool and Instance Configuration configuration is out of scope for this document. However, a
49+
working [instance-details.json](./examples/instance-details.json) and [placement-config.json](./examples/placement-config.json)
50+
([example](./examples/instance-details.json) based on Rancher [RKE](https://rancher.com/products/rke/)) using [cloud-init](https://cloudinit.readthedocs.io/en/latest/) are
51+
included in the examples, which can be applied using the [OCI CLI](https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm).
52+
53+
Modify the `user_data` in the example [instance-details.json](./examples/instance-details.json) to suit your needs, re-base64 encode, apply:
54+
55+
```bash
56+
# e.g. cloud-init. Modify, re-encode, and update user_data in instance-details.json to suit your needs:
57+
58+
$ echo IyEvYmluL2Jhc2gKdG91hci9saWIvYXB0L....1yZXRyeSAzIGhG91Y2ggL3RtcC9jbG91ZC1pbml0LWZpbmlzaGVkCg== | base64 -D
59+
60+
#!/bin/bash
61+
groupadd docker
62+
usermod -aG docker ubuntu
63+
curl --retry 3 https://releases.rancher.com/install-docker/20.10.sh | sh
64+
docker run -d --privileged --restart=unless-stopped --net=host -v /etc/kubernetes:/etc/kubernetes -v /var/run:/var/run rancher/rancher-agent:v2.5.5 --server https://my-rancher.com --token xxxxxx --worker
65+
```
66+
67+
```bash
68+
$ oci compute-management instance-configuration create --instance-details file://./cluster-autoscaler/cloudprovider/oci/examples/instance-details.json --compartment-id ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf --query 'data.id' --raw-output
69+
70+
ocid1.instanceconfiguration.oc1.phx.aaaaaaaa3neul67zb3goz43lybosc2o3fv67gj3zazexbb3vfcbypmpznhtq
71+
72+
$ oci compute-management instance-pool create --compartment-id ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf --instance-configuration-id ocid1.instanceconfiguration.oc1.phx.aaaaaaaa3neul67zb3goz43lybosc2o3fv67gj3zazexbb3vfcbypmpznhtq --placement-configurations file://./cluster-autoscaler/cloudprovider/oci/examples/placement-config.json --size 0 --wait-for-state RUNNING --query 'data.id' --raw-output
73+
74+
Action completed. Waiting until the resource has entered state: ('RUNNING',)
75+
ocid1.instancepool.oc1.phx.aaaaaaaayd5bxwrzomzr2b2enchm4mof7uhw7do5hc2afkhks576syikk2ca
76+
```
77+
78+
## Configure Autoscaler
79+
80+
Use the `--nodes=<min-nodes>:<max-nodes>:<instancepool-ocid>` parameter to specify which pre-existing instance
81+
pools to target for automatic expansion and contraction, the minimum and maximum sizes for each node pool, and how you
82+
want the autoscaling to take place. Instance pools not referenced in the configuration file are not managed by the
83+
autoscaler where:
84+
85+
- `<min-nodes>` is the minimum number of nodes allowed in the instance-pool.
86+
- `<max-nodes>` is the maximum number of nodes allowed in the instance-pool. Make sure the maximum number of nodes you specify does not exceed the tenancy limits for the node shape defined for the node pool.
87+
- `<instancepool-ocid>` is the OCIDs of a pre-existing instance-pool.
88+
89+
If you are authenticating via instance principals, be sure the `OCI_REGION` environment variable is set to the correct
90+
value in the deployment e.g.:
91+
92+
```yaml
93+
env:
94+
- name: OCI_REGION
95+
value: "us-phoenix-1"
96+
```
97+
98+
### Optional cloud-config file
99+
100+
_Optional_ cloud-config file mounted in the path specified by `--cloud-config`.
101+
102+
An example, of passing optional configuration via `cloud-config` file that uses configures the cluster-autoscaler to use
103+
instance-principals authenticating via instance principalsand only see configured instance-pools in a single compartment:
104+
105+
```ini
106+
[Global]
107+
compartment-id = ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf
108+
region = uk-london-1
109+
use-instance-principals = true
110+
```
111+
112+
### Environment variables
113+
114+
Configuration via environment-variables:
115+
116+
- `OCI_USE_INSTANCE_PRINCIPAL` - Whether to use Instance Principals for authentication rather than expecting an OCI config file to be mounted in the container. Defaults to false.
117+
- `OCI_REGION` - **Required** when using Instance Principals. e.g. `OCI_REGION=us-phoenix-1`. See [region list](https://docs.oracle.com/en-us/iaas/Content/General/Concepts/regions.htm) for identifiers.
118+
- `OCI_COMPARTMENT_ID` - Restrict the cluster-autoscaler to instance-pools in a single compartment. When unset, the cluster-autoscaler will manage each specified instance-pool no matter which compartment they are in.
119+
120+
## Deployment
121+
122+
### Create OCI config secret (only if _not_ using Instance Principals)
123+
124+
If you are opting for a file based OCI configuration (as opposed to instance principals), the OCI config file and private key need to be mounted into the container filesystem using a secret volume.
125+
126+
The following policy is required when the specified is not an administrator to run the cluster-autoscaler:
127+
128+
```
129+
Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-pools in compartment <compartment-name>
130+
Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-configurations in compartment <compartment-name>
131+
Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-family in compartment <compartment-name>
132+
Allow group acme-oci-cluster-autoscaler-user-grp to use subnets in compartment <compartment-name>
133+
Allow group acme-oci-cluster-autoscaler-user-grp to read virtual-network-family in compartment <compartment-name>
134+
Allow group acme-oci-cluster-autoscaler-user-grp to use vnics in compartment <compartment-name>
135+
Allow group acme-oci-cluster-autoscaler-user-grp to inspect compartments in compartment <compartment-name>
136+
```
137+
138+
Example OCI config file (note `key_file` is the expected path and filename of the OCI API private-key from the perspective of the container):
139+
140+
```bash
141+
$ cat ~/.oci/config
142+
143+
[DEFAULT]
144+
user=ocid1.user.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
145+
fingerprint=xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx
146+
key_file=/root/.oci/api_key.pem
147+
tenancy=ocid1.tenancy.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
148+
pass_phrase=
149+
region=us-phoenix-1
150+
```
151+
152+
Create the secret (`api_key.pem` key name is required):
153+
154+
```bash
155+
kubectl create secret generic oci-config -n kube-system --from-file=/Users/me/.oci/config --from-file=api_key.pem=/Users/me/.oci/my_api_key.pem
156+
```
157+
158+
### Example Deployment
159+
160+
Two example deployments of the cluster-autoscaler that manage instancepools are located in the [examples](./examples/) directory.
161+
[oci-ip-cluster-autoscaler-w-principals.yaml](./examples/oci-ip-cluster-autoscaler-w-principals.yaml) uses
162+
instance principals, and [oci-ip-cluster-autoscaler-w-config.yaml](./examples/oci-ip-cluster-autoscaler-w-config.yaml) uses file
163+
based authentication.
164+
165+
Note the 3 specified instance-pools are intended to correspond to different availability domains in the Phoenix, AZ region:
166+
167+
```yaml
168+
...
169+
containers:
170+
- image: docker.io/jlamillan/autoscaler:oci-pr-rc1
171+
name: cluster-autoscaler
172+
command:
173+
- ./cluster-autoscaler
174+
- --cloud-provider=oci
175+
- --nodes=1:10:ocid1.instancepool.oc1.phx.aaaaaaaaqdxy35acq32zjfvkybjmvlbdgj6q3m55qkwwctxhsprmz633k62q
176+
- --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaazldzcu4mi5spz56upbtwnsynz2nk6jvmx7zi4hsta4uggxbulbua
177+
- --nodes=0:20:ocid1.instancepool.oc1.phx.aaaaaaaal3jhoc32ljsfaeif4x2ssfa2a63oehjgqryiueivieee6yaqbkia
178+
```
179+
180+
Instance principal based authentication deployment:
181+
182+
Substitute the OCIDs of _your_ instance pool(s) and set the `OCI_REGION` environment variable to the region where your
183+
instance pool(s) reside before applying the deployment:
184+
185+
```
186+
kubectl apply -f ./cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml
187+
```
188+
189+
OCI config file based authentication deployment:
190+
191+
```
192+
kubectl apply -f ./cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml
193+
```
194+
195+
## Common Notes and Gotchas:
196+
- You must configure the instance configuration of new compute instances to join the existing cluster when they start. This can
197+
be accomplished with `cloud-init` / `user-data` in the instance launch configuration [example](./examples/instance-details.json).
198+
- If opting for a file based OCI configuration (as opposed to instance principals), ensure the OCI config and private-key
199+
PEM files are mounted into the container filesystem at the [expected path](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm). Note the `key_file` option in the example `~/.oci/config` above references a private-key file mounted into container by the example [volumeMount](./examples/oci-ip-cluster-autoscaler-w-config.yaml#L165)
200+
- Make sure the maximum number of nodes you specify does not exceed the limit for the instance-pool or the tenancy.
201+
- We recommend creating multiple instance-pools with one availability domain specified so new nodes can be created to meet
202+
affinity requirements across availability domains.
203+
- If you are authenticating via instance principals, be sure the `OCI_REGION` environment variable is set to the correct
204+
value in the deployment.
205+
- The cluster-autoscaler will not automatically remove scaled down (terminated) `Node` objects from the Kubernetes API
206+
without assistance from the [OCI Cloud Controller Manager](https://github.com/oracle/oci-cloud-controller-manager) (CCM).
207+
If scaled down nodes are lingering in your cluster in the `NotReady` status, ensure the OCI CCM is installed and running
208+
correctly (`oci-cloud-controller-manager`).
209+
- Avoid manually changing node pools that are managed by the cluster-autoscaler. For example, do not add or remove nodes
210+
using kubectl, or using the Console (or the Oracle Cloud Infrastructure CLI or API).
211+
- `--node-group-auto-discovery` and `--node-autoprovisioning-enabled=true` are not supported.
212+
- We set a `nvidia.com/gpu:NoSchedule` taint on nodes in a GPU enabled instance-pool.
213+
214+
## Helpful links
215+
- [Oracle Cloud Infrastructure home](https://cloud.oracle.com)
216+
- [OCI instance configuration documentation](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstanceconfig.htm)
217+
- [instance principals](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/callingservicesfrominstances.htm)
218+
- [OCI Cloud Controller Manager](https://github.com/oracle/oci-cloud-controller-manager)
219+
- [OCI Container Storage Interface driver](https://github.com/oracle/oci-cloud-controller-manager/blob/master/container-storage-interface.md)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"instanceType": "compute",
3+
"launchDetails": {
4+
"compartmentId": "ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf",
5+
"shape": "VM.Standard2.8",
6+
"sourceDetails":
7+
{
8+
"imageId": "ocid1.image.oc1.phx.aaaaaaaa55tzajot4gbiw2p7gquwjnvfzrasosbrq4h6wywkff4zjosp2fia",
9+
"sourceType": "image",
10+
"bootVolumeSizeInGBs": 100
11+
},
12+
"metadata": {
13+
"user_data": "IyEvYmluL2Jhc2gKdG91Y2ggL3RtcC9jbG91ZC1pbml0LXN0YXJ0ZWQKaXB0YWJsZXMgLUYKZ3JvdXBhZGQgZG9ja2VyCnVzZXJtb2QgLWFHIGRvY2tlciB1YnVudHUKcm0gL3Zhci9saWIvYXB0L2xpc3RzL2xvY2sKcGtpbGwgLTkgLWYgYXB0CmN1cmwgLS1yZXRyeSAzIGh0dHBzOi8vcmVsZWFzZXMucmFuY2hlci5jb20vaW5zdGFsbC1kb2NrZXIvMjAuMTAuc2ggfCBzaApkb2NrZXIgcnVuIC1kIC0tcHJpdmlsZWdlZCAtLXJlc3RhcnQ9dW5sZXNzLXN0b3BwZWQgLS1uZXQ9aG9zdCAtdiAvZXRjL2t1YmVybmV0ZXM6L2V0Yy9rdWJlcm5ldGVzIC12IC92YXIvcnVuOi92YXIvcnVuIHJhbmNoZXIvcmFuY2hlci1hZ2VudDp2Mi41LjUgLS1zZXJ2ZXIgaHR0cHM6Ly9teS1yYW5jaGVyLmNvbSAtLXRva2VuIHh4eHh4eCAgLS13b3JrZXIKdG91Y2ggL3RtcC9jbG91ZC1pbml0LWZpbmlzaGVkCg=="
14+
},
15+
"createVnicDetails": {
16+
"assignPublicIp": true
17+
}
18+
}
19+
}

0 commit comments

Comments
 (0)