Skip to content

Commit

Permalink
K0sControlPlane update strategies
Browse files Browse the repository at this point in the history
Signed-off-by: Alexey Makhov <[email protected]>
  • Loading branch information
makhov committed Aug 5, 2024
1 parent d3d386e commit dc896f9
Show file tree
Hide file tree
Showing 12 changed files with 165 additions and 63 deletions.
1 change: 1 addition & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ jobs:
- check-capi-controlplane-docker-worker
- check-capi-remote-machine-template-update
- check-capi-docker-machine-template-update
- check-capi-docker-machine-template-update-recreate
- check-capi-remote-machine-job-provision

uses: ./.github/workflows/capi-smoke-tests.yml
Expand Down
20 changes: 20 additions & 0 deletions .github/workflows/prepare-build-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env sh

set -eu

goVersion="$(./vars.sh go_version)"
golangciLintVersion="$(./vars.sh FROM=hack/tools golangci-lint_version)"
pythonVersion="$(./vars.sh FROM=docs python_version)"

cat <<EOF >>"$GITHUB_ENV"
GO_VERSION=$goVersion
GOLANGCI_LINT_VERSION=$golangciLintVersion
PYTHON_VERSION=$pythonVersion
EOF

# shellcheck disable=SC1090
. "$GITHUB_ENV"

echo ::group::OS Environment
env | sort
echo ::endgroup::
2 changes: 1 addition & 1 deletion api/controlplane/v1beta1/k0s_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func init() {
type UpdateStrategy string

const (
UpdateInPlace UpdateStrategy = "InPlace"
UpdateInPlace UpdateStrategy = "InPlace"
UpdateRecreate UpdateStrategy = "Recreate"
)

Expand Down
3 changes: 2 additions & 1 deletion internal/controller/bootstrap/providerid_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ func (p *ProviderIDController) Reconcile(ctx context.Context, req ctrl.Request)
}

if machine.Spec.ProviderID == nil || *machine.Spec.ProviderID == "" {
return ctrl.Result{}, fmt.Errorf("waiting for providerID for the machine %s/%s", machine.Namespace, machine.Name)
log.Info("waiting for providerID for the machine " + machine.Name)
return ctrl.Result{RequeueAfter: time.Second * 10}, nil
}

cluster, err := capiutil.GetClusterByName(ctx, p.Client, machine.Namespace, machine.Spec.ClusterName)
Expand Down
16 changes: 12 additions & 4 deletions internal/controller/controlplane/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,20 @@ func (c *K0sController) markChildControlNodeToLeave(ctx context.Context, name st

err := clientset.RESTClient().
Patch(types.MergePatchType).
AbsPath("/apis/autopilot.k0sproject.io/v1beta2/controlnodes/" + name).
Body([]byte(`{"metadata":{"annotations":{"k0smotron.io/leave":"true"}}}`)).
AbsPath("/apis/etcd.k0sproject.io/v1beta1/etcdmembers/" + name).
Body([]byte(`{"spec":{"leave":"true"}}`)).
Do(ctx).
Error()
if err != nil && !apierrors.IsNotFound(err) {
return fmt.Errorf("error marking control node to leave: %w", err)
if err != nil {
err := clientset.RESTClient().
Patch(types.MergePatchType).
AbsPath("/apis/autopilot.k0sproject.io/v1beta2/controlnodes/" + name).
Body([]byte(`{"metadata":{"annotations":{"k0smotron.io/leave":"true"}}}`)).
Do(ctx).
Error()
if err != nil && !apierrors.IsNotFound(err) {
return fmt.Errorf("error marking control node to leave: %w", err)
}
}

return nil
Expand Down
32 changes: 11 additions & 21 deletions internal/controller/controlplane/k0s_controlplane_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ import (
"context"
"errors"
"fmt"
"strings"
"time"

"github.com/Masterminds/semver"
"github.com/google/uuid"
autopilot "github.com/k0sproject/k0s/pkg/apis/autopilot/v1beta2"
bootstrapv1 "github.com/k0sproject/k0smotron/api/bootstrap/v1beta1"
cpv1beta1 "github.com/k0sproject/k0smotron/api/controlplane/v1beta1"
"github.com/k0sproject/k0smotron/internal/controller/util"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -46,10 +46,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"

bootstrapv1 "github.com/k0sproject/k0smotron/api/bootstrap/v1beta1"
cpv1beta1 "github.com/k0sproject/k0smotron/api/controlplane/v1beta1"
"github.com/k0sproject/k0smotron/internal/controller/util"
"strings"
)

const (
Expand Down Expand Up @@ -230,10 +227,8 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
replicasToReport = kcp.Status.Replicas
}

fmt.Println("asdfsadfsdafsdafa111")

if kcp.Status.Version != "" && kcp.Spec.Version != kcp.Status.Version {
if kcp.Spec.UpdateStrategy == "rollout" {
if kcp.Spec.UpdateStrategy == cpv1beta1.UpdateRecreate {
desiredReplicas += kcp.Spec.Replicas
machinesToDelete = int(kcp.Spec.Replicas)
replicasToReport = desiredReplicas
Expand All @@ -243,9 +238,10 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err)
}

err = c.createAutopilotPlan(ctx, kcp, cluster, kubeClient)
if err != nil {
return replicasToReport, fmt.Errorf("error creating autopilot plan: %w", err)
err = c.createAutopilotPlan(ctx, kcp, cluster, kubeClient)
if err != nil {
return replicasToReport, fmt.Errorf("error creating autopilot plan: %w", err)
}
}
}

Expand Down Expand Up @@ -278,11 +274,6 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
}
}

fmt.Println("asdfsadfsdafsdafa")
fmt.Println("machines", machines)
fmt.Println("machinesToDelete", machinesToDelete)

//var isNewMachineReady bool
for _, m := range machines {
ver := semver.MustParse(kcp.Spec.Version)
fmt.Println("machines ver", machinesToDelete, *m.Spec.Version, fmt.Sprintf("v%d.%d.%d", ver.Major(), ver.Minor(), ver.Patch()), m.Spec.Version != nil && *m.Spec.Version != fmt.Sprintf("v%d.%d.%d", ver.Major(), ver.Minor(), ver.Patch()))
Expand All @@ -291,13 +282,12 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
}

if machinesToDelete > 0 {

kubeClient, err := c.getKubeClient(ctx, cluster)
if err != nil {
return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err)
}
var cn autopilot.ControlNode
err = kubeClient.RESTClient().Get().AbsPath("/apis/autopilot.k0sproject.io/v1beta2/controlnodes").Name(m.Name).Do(ctx).Into(&cn)
err = kubeClient.RESTClient().Get().AbsPath("/apis/autopilot.k0sproject.io/v1beta2/controlnodes/" + m.Name).Do(ctx).Into(&cn)
fmt.Println("machines !!!", cn.Name, cn.Status)
if err != nil {
if apierrors.IsNotFound(err) {
Expand Down Expand Up @@ -340,7 +330,7 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
return kcp.Status.Replicas, fmt.Errorf("waiting for previous machine to be deleted")
}

time.Sleep(time.Second * 10)
//time.Sleep(time.Second * 10)

replicasToReport -= 1

Check warning on line 335 in internal/controller/controlplane/k0s_controlplane_controller.go

View workflow job for this annotation

GitHub Actions / Lint

increment-decrement: should replace replicasToReport -= 1 with replicasToReport-- (revive)
name := machine.Name
Expand Down
11 changes: 9 additions & 2 deletions internal/util/dynamic_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package util
import (
"context"
"fmt"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"
"sigs.k8s.io/cluster-api/controllers/remote"
"sigs.k8s.io/cluster-api/util"
Expand All @@ -31,15 +34,19 @@ func ReconcileDynamicConfig(ctx context.Context, cluster metav1.Object, cli clie
return fmt.Errorf("failed to create workload cluster client: %w", err)
}

err = retry.OnError(retry.DefaultBackoff, func(err error) bool {
err = retry.OnError(wait.Backoff{
Steps: 4,
Duration: 100 * time.Millisecond,
Factor: 5.0,
Jitter: 0.5,
}, func(err error) bool {
return true
}, func() error {
return chCS.Patch(ctx, u, client.RawPatch(client.Merge.Type(), b), []client.PatchOption{}...)
})
if err != nil {
return fmt.Errorf("failed to patch k0s config: %w", err)
}
//return chCS.Patch(ctx, u, client.RawPatch(client.Merge.Type(), b), []client.PatchOption{}...)

return nil
}
3 changes: 2 additions & 1 deletion inttest/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,5 @@ check-capi-remote-machine-template: TIMEOUT=12m
check-capi-remote-machine-template-update: TIMEOUT=10m
check-capi-docker-machine-template-update: TIMEOUT=10m
check-capi-remote-machine-job-provision: TIMEOUT=10m
check-capi-docker-machine-template-update-rollout: TIMEOUT=10m
check-capi-docker-machine-template-update: TIMEOUT=10m
check-capi-docker-machine-template-update-recreate: TIMEOUT=10m
2 changes: 1 addition & 1 deletion inttest/Makefile.variables
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ smoketests := \
check-capi-remote-machine-template \
check-capi-remote-machine-template-update \
check-capi-docker-machine-template-update \
check-capi-docker-machine-template-update-rollout \
check-capi-docker-machine-template-update-recreate \
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

package capidockermachinetemplateupdaterollout
package capidockermachinetemplateupdaterecreate

import (
"context"
Expand All @@ -38,8 +38,8 @@ import (
"k8s.io/client-go/tools/clientcmd"
)

type CAPIDockerMachineTemplateUpdateRollout struct {
//type CAPIDockerMachineTemplateUpdateRollout struct {
type CAPIDockerMachineTemplateUpdateRecreate struct {
//type CAPIDockerMachineTemplateUpdateRecreate struct {
suite.Suite
client *kubernetes.Clientset
restConfig *rest.Config
Expand All @@ -48,12 +48,12 @@ type CAPIDockerMachineTemplateUpdateRollout struct {
ctx context.Context
}

func TestCAPIDockerMachineTemplateUpdateRollout(t *testing.T) {
s := CAPIDockerMachineTemplateUpdateRollout{}
func TestCAPIDockerMachineTemplateUpdateRecreate(t *testing.T) {
s := CAPIDockerMachineTemplateUpdateRecreate{}
suite.Run(t, &s)
}

func (s *CAPIDockerMachineTemplateUpdateRollout) SetupSuite() {
func (s *CAPIDockerMachineTemplateUpdateRecreate) SetupSuite() {
kubeConfigPath := os.Getenv("KUBECONFIG")
s.Require().NotEmpty(kubeConfigPath, "KUBECONFIG env var must be set and point to kind cluster")
// Get kube client from kubeconfig
Expand All @@ -77,7 +77,7 @@ func (s *CAPIDockerMachineTemplateUpdateRollout) SetupSuite() {
s.ctx, _ = util.NewSuiteContext(s.T())
}

func (s *CAPIDockerMachineTemplateUpdateRollout) TestCAPIControlPlaneDockerDownScaling() {
func (s *CAPIDockerMachineTemplateUpdateRecreate) TestCAPIControlPlaneDockerDownScaling() {

// Apply the child cluster objects
s.applyClusterObjects()
Expand Down Expand Up @@ -117,52 +117,105 @@ func (s *CAPIDockerMachineTemplateUpdateRollout) TestCAPIControlPlaneDockerDownS
})
s.Require().NoError(err)

//for i := 0; i < 3; i++ {
// // nolint:staticcheck
// err = wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) {
// nodeName := fmt.Sprintf("docker-test-%d", i)
// output, err := exec.Command("docker", "exec", nodeName, "k0s", "status").Output()
// if err != nil {
// return false, nil
// }
//
// return strings.Contains(string(output), "Version:"), nil
// })
// s.Require().NoError(err)
//}
time.Sleep(time.Minute * 3)
var nodeIDs []string
// nolint:staticcheck
err = wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) {

Check failure on line 122 in inttest/capi-docker-machine-template-update-recreate/capi_docker_machine_template_update_recreate_test.go

View workflow job for this annotation

GitHub Actions / Lint

ineffectual assignment to err (ineffassign)
var err error
nodeIDs, err = util.GetControlPlaneNodesIDs("docker-test-")

if err != nil {
return false, nil
}

return len(nodeIDs) == 3, nil
})

for i := 0; i < 3; i++ {
// nolint:staticcheck
err = wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) {
nodeID := nodeIDs[i]
output, err := exec.Command("docker", "exec", nodeID, "k0s", "status").Output()
if err != nil {
return false, nil
}

return strings.Contains(string(output), "Version:"), nil
})
s.Require().NoError(err)
}

s.T().Log("waiting for node to be ready")
s.Require().NoError(k0stestutil.WaitForNodeReadyStatus(s.ctx, kmcKC, "docker-test-worker-0", corev1.ConditionTrue))

s.T().Log("updating cluster objects")
s.updateClusterObjects()

// nolint:staticcheck
err = wait.PollImmediateUntilWithContext(s.ctx, 100*time.Millisecond, func(ctx context.Context) (bool, error) {

Check failure on line 154 in inttest/capi-docker-machine-template-update-recreate/capi_docker_machine_template_update_recreate_test.go

View workflow job for this annotation

GitHub Actions / Lint

ineffectual assignment to err (ineffassign)
var err error
newNodeIDs, err := util.GetControlPlaneNodesIDs("docker-test-")

if err != nil {
return false, nil
}

return len(newNodeIDs) == 6, nil
})

//for i := range nodeIDs {
// out, err := exec.Command("docker", "stop", nodeIDs[i]).CombinedOutput()
// s.Require().NoError(err, "failed to stop node: %s", string(out))
//}

// nolint:staticcheck
err = wait.PollImmediateUntilWithContext(s.ctx, 100*time.Millisecond, func(ctx context.Context) (bool, error) {

Check failure on line 171 in inttest/capi-docker-machine-template-update-recreate/capi_docker_machine_template_update_recreate_test.go

View workflow job for this annotation

GitHub Actions / Lint

ineffectual assignment to err (ineffassign)
var err error
nodeIDs, err = util.GetControlPlaneNodesIDs("docker-test-")

if err != nil {
return false, nil
}

return len(nodeIDs) == 3, nil
})

// nolint:staticcheck
err = wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) {
var err error
nodeIDs, err = util.GetControlPlaneNodesIDs("docker-test-")

if err != nil {
return false, nil
}

return len(nodeIDs) == 3, nil
})

// nolint:staticcheck
err = wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) {
output, err := exec.Command("docker", "exec", "docker-test-0", "k0s", "status").CombinedOutput()
output, err := exec.Command("docker", "exec", nodeIDs[0], "k0s", "status").CombinedOutput()
if err != nil {
return false, nil
}

return strings.Contains(string(output), "Version: v1.28"), nil
})
s.Require().NoError(err)

s.Require().NoError(k0stestutil.WaitForNodeReadyStatus(s.ctx, kmcKC, "docker-test-worker-0", corev1.ConditionTrue))
}

func (s *CAPIDockerMachineTemplateUpdateRollout) applyClusterObjects() {
func (s *CAPIDockerMachineTemplateUpdateRecreate) applyClusterObjects() {
// Exec via kubectl
out, err := exec.Command("kubectl", "apply", "-f", s.clusterYamlsPath).CombinedOutput()
s.Require().NoError(err, "failed to apply cluster objects: %s", string(out))
}

func (s *CAPIDockerMachineTemplateUpdateRollout) updateClusterObjects() {
func (s *CAPIDockerMachineTemplateUpdateRecreate) updateClusterObjects() {
// Exec via kubectl
out, err := exec.Command("kubectl", "apply", "-f", s.clusterYamlsUpdatePath).CombinedOutput()
s.Require().NoError(err, "failed to update cluster objects: %s", string(out))
}

func (s *CAPIDockerMachineTemplateUpdateRollout) deleteCluster() {
func (s *CAPIDockerMachineTemplateUpdateRecreate) deleteCluster() {
// Exec via kubectl
out, err := exec.Command("kubectl", "delete", "-f", s.clusterYamlsPath).CombinedOutput()
s.Require().NoError(err, "failed to delete cluster objects: %s", string(out))
Expand Down Expand Up @@ -223,7 +276,7 @@ metadata:
spec:
replicas: 3
version: v1.27.1+k0s.0
updateStrategy: rollout
updateStrategy: Recreate
k0sConfigSpec:
k0s:
apiVersion: k0s.k0sproject.io/v1beta1
Expand Down Expand Up @@ -293,7 +346,7 @@ metadata:
spec:
replicas: 3
version: v1.28.7+k0s.0
updateStrategy: rollout
updateStrategy: Recreate
k0sConfigSpec:
k0s:
apiVersion: k0s.k0sproject.io/v1beta1
Expand Down
Loading

0 comments on commit dc896f9

Please sign in to comment.