Skip to content

Commit

Permalink
Error handler
Browse files Browse the repository at this point in the history
  • Loading branch information
bouskaJ committed Jul 30, 2024
1 parent 3c0eaf9 commit 2180a28
Show file tree
Hide file tree
Showing 106 changed files with 1,607 additions and 584 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ endif

# Set the Operator SDK version to use. By default, what is installed on the system is used.
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.34.1
OPERATOR_SDK_VERSION ?= v1.34.2

# Image URL to use all building/pushing image targets
ifdef IMAGE_TAG
Expand Down
4 changes: 4 additions & 0 deletions api/v1alpha1/ctlog_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ type CTlogStatus struct {
RootCertificates []SecretKeySelector `json:"rootCertificates,omitempty"`
// The ID of a Trillian tree that stores the log data.
TreeID *int64 `json:"treeID,omitempty"`
// Number of component recovery attempts.
//+kubebuilder:default:=0
RecoveryAttempts int64 `json:"recoveryAttempts,omitempty"`
// +listType=map
// +listMapKey=type
// +patchStrategy=merge
Expand All @@ -60,6 +63,7 @@ type CTlogStatus struct {
//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].reason`,description="The component status"
//+kubebuilder:printcolumn:name="Recovery Attempts",type=string,JSONPath=`.status.recoveryAttempts`,description="The component recovery attempts"

// CTlog is the Schema for the ctlogs API
type CTlog struct {
Expand Down
4 changes: 4 additions & 0 deletions api/v1alpha1/rekor_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ type RekorStatus struct {
RekorSearchUIUrl string `json:"rekorSearchUIUrl,omitempty"`
// The ID of a Trillian tree that stores the log data.
TreeID *int64 `json:"treeID,omitempty"`
// Number of component recovery attempts.
//+kubebuilder:default:=0
RecoveryAttempts int64 `json:"recoveryAttempts,omitempty"`
// +listType=map
// +listMapKey=type
// +patchStrategy=merge
Expand All @@ -91,6 +94,7 @@ type RekorStatus struct {
//+kubebuilder:subresource:status
//+kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].reason`,description="The component status"
//+kubebuilder:printcolumn:name="URL",type=string,JSONPath=`.status.url`,description="The component url"
//+kubebuilder:printcolumn:name="Recovery Attempts",type=string,JSONPath=`.status.recoveryAttempts`,description="The component recovery attempts"

// Rekor is the Schema for the rekors API
type Rekor struct {
Expand Down
2 changes: 1 addition & 1 deletion bundle/manifests/rhtas-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ metadata:
]
capabilities: Seamless Upgrades
containerImage: registry.redhat.io/rhtas/rhtas-rhel9-operator@sha256:a21f7128694a64989bf0d84a7a7da4c1ffc89edf62d594dc8bea7bcfe9ac08d3
createdAt: "2024-07-10T17:23:58Z"
createdAt: "2024-07-25T08:46:13Z"
features.operators.openshift.io/cnf: "false"
features.operators.openshift.io/cni: "false"
features.operators.openshift.io/csi: "false"
Expand Down
9 changes: 9 additions & 0 deletions bundle/manifests/rhtas.redhat.com_ctlogs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ spec:
jsonPath: .status.conditions[?(@.type=="Ready")].reason
name: Status
type: string
- description: The component recovery attempts
jsonPath: .status.recoveryAttempts
name: Recovery Attempts
type: string
name: v1alpha1
schema:
openAPIV3Schema:
Expand Down Expand Up @@ -278,6 +282,11 @@ spec:
- name
type: object
x-kubernetes-map-type: atomic
recoveryAttempts:
default: 0
description: Number of component recovery attempts.
format: int64
type: integer
rootCertificates:
items:
description: SecretKeySelector selects a key of a Secret.
Expand Down
9 changes: 9 additions & 0 deletions bundle/manifests/rhtas.redhat.com_rekors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ spec:
jsonPath: .status.url
name: URL
type: string
- description: The component recovery attempts
jsonPath: .status.recoveryAttempts
name: Recovery Attempts
type: string
name: v1alpha1
schema:
openAPIV3Schema:
Expand Down Expand Up @@ -321,6 +325,11 @@ spec:
x-kubernetes-map-type: atomic
pvcName:
type: string
recoveryAttempts:
default: 0
description: Number of component recovery attempts.
format: int64
type: integer
rekorSearchUIUrl:
type: string
serverConfigRef:
Expand Down
1 change: 1 addition & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ func main() {
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
flag.Int64Var(&constants.CreateTreeDeadline, "create-tree-deadline", constants.CreateTreeDeadline, "The time allowance (in seconds) for the create tree job to run before failing.")
utils.BoolFlagOrEnv(&constants.Openshift, "openshift", "OPENSHIFT", false, "Enable to ensures the operator applies OpenShift specific configurations.")
flag.Int64Var(&constants.AllowedRecoveryAttempts, "recovery-attempts", constants.AllowedRecoveryAttempts, "Specifies the maximum number of recovery attempts allowed before an operation is considered failed.")
utils.StringFlagOrEnv(&constants.TrillianLogSignerImage, "trillian-log-signer-image", "TRILLIAN_LOG_SIGNER_IMAGE", constants.TrillianLogSignerImage, "The image used for trillian log signer.")
utils.StringFlagOrEnv(&constants.TrillianServerImage, "trillian-log-server-image", "TRILLIAN_LOG_SERVER_IMAGE", constants.TrillianServerImage, "The image used for trillian log server.")
utils.StringFlagOrEnv(&constants.TrillianDbImage, "trillian-db-image", "TRILLIAN_DB_IMAGE", constants.TrillianDbImage, "The image used for trillian's database.")
Expand Down
9 changes: 9 additions & 0 deletions config/crd/bases/rhtas.redhat.com_ctlogs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ spec:
jsonPath: .status.conditions[?(@.type=="Ready")].reason
name: Status
type: string
- description: The component recovery attempts
jsonPath: .status.recoveryAttempts
name: Recovery Attempts
type: string
name: v1alpha1
schema:
openAPIV3Schema:
Expand Down Expand Up @@ -278,6 +282,11 @@ spec:
- name
type: object
x-kubernetes-map-type: atomic
recoveryAttempts:
default: 0
description: Number of component recovery attempts.
format: int64
type: integer
rootCertificates:
items:
description: SecretKeySelector selects a key of a Secret.
Expand Down
9 changes: 9 additions & 0 deletions config/crd/bases/rhtas.redhat.com_rekors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ spec:
jsonPath: .status.url
name: URL
type: string
- description: The component recovery attempts
jsonPath: .status.recoveryAttempts
name: Recovery Attempts
type: string
name: v1alpha1
schema:
openAPIV3Schema:
Expand Down Expand Up @@ -321,6 +325,11 @@ spec:
x-kubernetes-map-type: atomic
pvcName:
type: string
recoveryAttempts:
default: 0
description: Number of component recovery attempts.
format: int64
type: integer
rekorSearchUIUrl:
type: string
serverConfigRef:
Expand Down
10 changes: 10 additions & 0 deletions internal/apis/conditions_aware.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package apis

import (
"github.com/securesign/operator/internal/controller/constants"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand All @@ -11,3 +13,11 @@ type ConditionsAwareObject interface {
GetConditions() []metav1.Condition
SetCondition(newCondition metav1.Condition)
}

func IsError(obj ConditionsAwareObject) bool {
if obj != nil && meta.IsStatusConditionFalse(obj.GetConditions(), constants.Ready) {
return meta.FindStatusCondition(obj.GetConditions(), constants.Ready).Reason == constants.Error
} else {
return false
}
}
12 changes: 9 additions & 3 deletions internal/controller/common/action/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,18 @@ type Action[T apis.ConditionsAwareObject] interface {
InjectRecorder(recorder record.EventRecorder)
InjectLogger(logger logr.Logger)

// a user friendly name for the action
// Name a user friendly name for the action
Name() string

// returns true if the action can handle the integration
// CanHandle returns true if the action can handle
CanHandle(context.Context, T) bool

// executes the handling function
// Handle executes the handling function
Handle(context.Context, T) *Result

// CanHandleError returns true if the action can handle the error
CanHandleError(context.Context, T) bool

// HandleError executes the error handling function for specific action
HandleError(context.Context, T) *Result
}
50 changes: 42 additions & 8 deletions internal/controller/common/action/base_action.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ import (
"strings"
"time"

"github.com/securesign/operator/internal/controller/annotations"

"github.com/go-logr/logr"
"github.com/securesign/operator/internal/apis"
"github.com/securesign/operator/internal/controller/annotations"
"github.com/securesign/operator/internal/controller/constants"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
client2 "sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -49,29 +52,60 @@ func (action *BaseAction) StatusUpdate(ctx context.Context, obj client2.Object)
if strings.Contains(err.Error(), OptimisticLockErrorMsg) {
return &Result{Result: reconcile.Result{RequeueAfter: 1 * time.Second}, Err: nil}
}
return action.Failed(err)
return action.Error(err)
}
// Requeue will be caused by update
return &Result{Result: reconcile.Result{Requeue: false}}
}

func (action *BaseAction) Failed(err error) *Result {
func (action *BaseAction) Error(err error) *Result {
action.Logger.Error(err, "error during action execution")
return &Result{
Result: reconcile.Result{RequeueAfter: time.Duration(5) * time.Second},
Err: err,
Err: err,
}
}

// ErrorWithStatusUpdate - Set `Error` status on deployment and execute error-recovery loop in 10 second
func (action *BaseAction) ErrorWithStatusUpdate(ctx context.Context, err error, instance apis.ConditionsAwareObject) *Result {
action.Recorder.Event(instance, v1.EventTypeWarning, constants.Error, err.Error())

instance.SetCondition(metav1.Condition{
Type: constants.Ready,
Status: metav1.ConditionFalse,
Reason: constants.Error,
Message: err.Error(),
})

if e := action.Client.Status().Update(ctx, instance); e != nil {
if strings.Contains(err.Error(), OptimisticLockErrorMsg) {
return &Result{Result: reconcile.Result{RequeueAfter: 1 * time.Second}, Err: err}
}
err = errors.Join(e, err)
}
// Requeue is disabled for Error objects
// wait for 10 seconds and invoke error-handler
return &Result{Result: reconcile.Result{RequeueAfter: 10 * time.Second}}
}

func (action *BaseAction) FailedWithStatusUpdate(ctx context.Context, err error, instance client2.Object) *Result {
// FailWithStatusUpdate - Throw deployment to the Failure state with no error-recovery attempts
func (action *BaseAction) FailWithStatusUpdate(ctx context.Context, err error, instance apis.ConditionsAwareObject) *Result {
action.Recorder.Event(instance, v1.EventTypeWarning, constants.Failure, err.Error())

instance.SetCondition(metav1.Condition{
Type: constants.Ready,
Status: metav1.ConditionFalse,
Reason: constants.Failure,
Message: err.Error(),
})

if e := action.Client.Status().Update(ctx, instance); e != nil {
if strings.Contains(err.Error(), OptimisticLockErrorMsg) {
return &Result{Result: reconcile.Result{RequeueAfter: 1 * time.Second}, Err: err}
}
err = errors.Join(e, err)
}
// Requeue will be caused by update
return &Result{Result: reconcile.Result{Requeue: false}, Err: err}
return &Result{Result: reconcile.Result{Requeue: false}}
}

func (action *BaseAction) Return() *Result {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type restartAction[T apis.ConditionsAwareObject] struct {
}

func (i restartAction[T]) Name() string {
return "restart deployment"
return "restart on error"
}

func (i restartAction[T]) CanHandle(_ context.Context, instance T) bool {
Expand Down Expand Up @@ -54,11 +54,11 @@ func (i restartAction[T]) HandleError(ctx context.Context, instance T) *action.R
if err != nil {
return i.Error(err)
}
if restarts < constants.AllowedRestarts {
if restarts < constants.AllowedRecoveryAttempts {
instance.SetCondition(metav1.Condition{Type: constants.Ready,
Status: metav1.ConditionFalse, Reason: constants.Pending})
} else {
return i.FailWithStatusUpdate(ctx, fmt.Errorf("restart threshold reached"), instance)
return i.FailWithStatusUpdate(ctx, fmt.Errorf("recovery threshold reached"), instance)
}

return i.StatusUpdate(ctx, instance)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package transitions

import (
"context"
"fmt"
"testing"

. "github.com/onsi/gomega"
"github.com/securesign/operator/api/v1alpha1"
"github.com/securesign/operator/internal/controller/constants"
testAction "github.com/securesign/operator/internal/testing/action"
"k8s.io/apimachinery/pkg/api/meta"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func Test_HandleError(t *testing.T) {
g := NewWithT(t)

instance := &v1alpha1.CTlog{
ObjectMeta: v1.ObjectMeta{Name: "error", Namespace: "default"},
Status: v1alpha1.CTlogStatus{
RecoveryAttempts: 0,
Conditions: []v1.Condition{{
Type: constants.Ready,
Status: v1.ConditionFalse,
Reason: constants.Error,
}},
}}

a := NewRestartOnErrorAction[*v1alpha1.CTlog]()
f := testAction.FakeClientBuilder().WithObjects(instance).WithStatusSubresource(instance).Build()
a = testAction.PrepareAction(f, a)

ctx := context.TODO()
g.Expect(a.CanHandleError(ctx, instance)).To(BeTrue())
result := a.HandleError(ctx, instance)

g.Expect(result).Should(Equal(testAction.StatusUpdate()))
g.Expect(instance.Status.RecoveryAttempts).Should(Equal(int64(1)))
g.Expect(meta.FindStatusCondition(instance.GetConditions(), constants.Ready).Reason).Should(Equal(constants.Pending))
}

func Test_HandleError_Threshold(t *testing.T) {
g := NewWithT(t)

instance := &v1alpha1.CTlog{
ObjectMeta: v1.ObjectMeta{Name: "treshold", Namespace: "default"},
Status: v1alpha1.CTlogStatus{
Conditions: []v1.Condition{{
Type: constants.Ready,
Status: v1.ConditionFalse,
Reason: constants.Error,
}},
RecoveryAttempts: constants.AllowedRecoveryAttempts - 1,
}}

a := NewRestartOnErrorAction[*v1alpha1.CTlog]()
f := testAction.FakeClientBuilder().WithObjects(instance).WithStatusSubresource(instance).Build()

a = testAction.PrepareAction(f, a)
ctx := context.TODO()
g.Expect(a.CanHandleError(ctx, instance)).To(BeTrue())
result := a.HandleError(ctx, instance)

g.Expect(result).Should(Equal(testAction.FailWithStatusUpdate(fmt.Errorf("error"))))
g.Expect(instance.Status.RecoveryAttempts).Should(Equal(constants.AllowedRecoveryAttempts))
g.Expect(meta.FindStatusCondition(instance.GetConditions(), constants.Ready).Reason).Should(Equal(constants.Failure))
}

func Test_HandleError_Running(t *testing.T) {
g := NewWithT(t)

instance := &v1alpha1.CTlog{
ObjectMeta: v1.ObjectMeta{Name: "handleRunning", Namespace: "default"},
Status: v1alpha1.CTlogStatus{
Conditions: []v1.Condition{{
Type: constants.Ready,
Status: v1.ConditionTrue,
Reason: constants.Ready,
}},
RecoveryAttempts: 2,
}}

a := NewRestartOnErrorAction[*v1alpha1.CTlog]()
f := testAction.FakeClientBuilder().WithObjects(instance).WithStatusSubresource(instance).Build()
a = testAction.PrepareAction(f, a)
ctx := context.TODO()
g.Expect(a.CanHandle(ctx, instance)).To(BeTrue())
result := a.Handle(ctx, instance)

g.Expect(result).Should(Equal(testAction.StatusUpdate()))
g.Expect(instance.Status.RecoveryAttempts).Should(Equal(int64(0)))
g.Expect(meta.FindStatusCondition(instance.GetConditions(), constants.Ready).Reason).Should(Equal(constants.Ready))
}
Loading

0 comments on commit 2180a28

Please sign in to comment.