Skip to content

Commit

Permalink
remove rate limit error from state (#477)
Browse files Browse the repository at this point in the history
  • Loading branch information
I065450 authored Nov 12, 2024
1 parent 2eb77b1 commit 9cebc45
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 51 deletions.
12 changes: 7 additions & 5 deletions client/sm/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,11 @@ type Client interface {
}

type ServiceManagerError struct {
ErrorType string `json:"error,omitempty"`
Description string `json:"description,omitempty"`
StatusCode int `json:"-"`
BrokerError *common.HTTPStatusCodeError `json:"broker_error,omitempty"`
ErrorType string `json:"error,omitempty"`
Description string `json:"description,omitempty"`
StatusCode int `json:"-"`
BrokerError *common.HTTPStatusCodeError `json:"broker_error,omitempty"`
ResponseHeaders http.Header `json:"-"`
}

func (e *ServiceManagerError) Error() string {
Expand Down Expand Up @@ -590,7 +591,8 @@ func handleResponseError(response *http.Response) error {
}

smError := &ServiceManagerError{
StatusCode: response.StatusCode,
StatusCode: response.StatusCode,
ResponseHeaders: response.Header,
}
_ = json.Unmarshal(body, &smError)

Expand Down
11 changes: 5 additions & 6 deletions controllers/servicebinding_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s
_, bindingParameters, err := utils.BuildSMRequestParameters(serviceBinding.Namespace, serviceBinding.Spec.ParametersFrom, serviceBinding.Spec.Parameters)
if err != nil {
log.Error(err, "failed to parse smBinding parameters")
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err.Error(), serviceBinding)
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err, serviceBinding)
}

smBinding, operationURL, bindErr := smClient.Bind(&smClientTypes.ServiceBinding{
Expand All @@ -278,13 +278,13 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s

if bindErr != nil {
log.Error(err, "failed to create service binding", "serviceInstanceID", serviceInstance.Status.InstanceID)
return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, bindErr, serviceBinding, true)
return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, bindErr, serviceBinding)
}

if operationURL != "" {
var bindingID string
if bindingID = sm.ExtractBindingID(operationURL); len(bindingID) == 0 {
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, fmt.Sprintf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, fmt.Errorf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
}
serviceBinding.Status.BindingID = bindingID

Expand Down Expand Up @@ -360,8 +360,7 @@ func (r *ServiceBindingReconciler) delete(ctx context.Context, serviceBinding *v
log.Info(fmt.Sprintf("Deleting binding with id %v from SM", serviceBinding.Status.BindingID))
operationURL, unbindErr := smClient.Unbind(serviceBinding.Status.BindingID, nil, utils.BuildUserInfo(ctx, serviceBinding.Spec.UserInfo))
if unbindErr != nil {
// delete will proceed anyway
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, unbindErr.Error(), serviceBinding)
return utils.HandleDeleteError(ctx, r.Client, unbindErr, serviceBinding)
}

if operationURL != "" {
Expand Down Expand Up @@ -847,7 +846,7 @@ func (r *ServiceBindingReconciler) handleSecretError(ctx context.Context, op smC
log := utils.GetLogger(ctx)
log.Error(err, fmt.Sprintf("failed to store secret %s for binding %s", binding.Spec.SecretName, binding.Name))
if apierrors.ReasonForError(err) == metav1.StatusReasonUnknown {
return utils.MarkAsNonTransientError(ctx, r.Client, op, err.Error(), binding)
return utils.MarkAsNonTransientError(ctx, r.Client, op, err, binding)
}
return utils.MarkAsTransientError(ctx, r.Client, op, err, binding)
}
Expand Down
7 changes: 3 additions & 4 deletions controllers/servicebinding_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ import (
"context"
"encoding/json"
"errors"
"net/http"
"strings"

"github.com/lithammer/dedent"
authv1 "k8s.io/api/authentication/v1"
"net/http"
"strings"

"github.com/SAP/sap-btp-service-operator/api/common"
"github.com/SAP/sap-btp-service-operator/internal/utils"
Expand Down Expand Up @@ -400,7 +399,7 @@ var _ = Describe("ServiceBinding controller", func() {
})
})

When("SM returned transient error(429)", func() {
When("SM returned transient error(429) without retry-after header", func() {
BeforeEach(func() {
errorMessage = "too many requests"
fakeClient.BindReturnsOnCall(0, nil, "", &sm.ServiceManagerError{
Expand Down
10 changes: 5 additions & 5 deletions controllers/serviceinstance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func (r *ServiceInstanceReconciler) createInstance(ctx context.Context, smClient
if err != nil {
// if parameters are invalid there is nothing we can do, the user should fix it according to the error message in the condition
log.Error(err, "failed to parse instance parameters")
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err.Error(), serviceInstance)
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err, serviceInstance)
}

provision, provisionErr := smClient.Provision(&smClientTypes.ServiceInstance{
Expand All @@ -193,7 +193,7 @@ func (r *ServiceInstanceReconciler) createInstance(ctx context.Context, smClient
if provisionErr != nil {
log.Error(provisionErr, "failed to create service instance", "serviceOfferingName", serviceInstance.Spec.ServiceOfferingName,
"servicePlanName", serviceInstance.Spec.ServicePlanName)
return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, provisionErr, serviceInstance, true)
return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, provisionErr, serviceInstance)
}

serviceInstance.Status.InstanceID = provision.InstanceID
Expand Down Expand Up @@ -231,7 +231,7 @@ func (r *ServiceInstanceReconciler) updateInstance(ctx context.Context, smClient
_, instanceParameters, err := utils.BuildSMRequestParameters(serviceInstance.Namespace, serviceInstance.Spec.ParametersFrom, serviceInstance.Spec.Parameters)
if err != nil {
log.Error(err, "failed to parse instance parameters")
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.UPDATE, fmt.Sprintf("failed to parse parameters: %v", err.Error()), serviceInstance)
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance)
}

_, operationURL, err := smClient.UpdateInstance(serviceInstance.Status.InstanceID, &smClientTypes.ServiceInstance{
Expand All @@ -242,7 +242,7 @@ func (r *ServiceInstanceReconciler) updateInstance(ctx context.Context, smClient

if err != nil {
log.Error(err, fmt.Sprintf("failed to update service instance with ID %s", serviceInstance.Status.InstanceID))
return utils.HandleError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance, true)
return utils.HandleError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance)
}

if operationURL != "" {
Expand Down Expand Up @@ -296,7 +296,7 @@ func (r *ServiceInstanceReconciler) deleteInstance(ctx context.Context, serviceI
operationURL, deprovisionErr := smClient.Deprovision(serviceInstance.Status.InstanceID, nil, utils.BuildUserInfo(ctx, serviceInstance.Spec.UserInfo))
if deprovisionErr != nil {
// delete will proceed anyway
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, deprovisionErr.Error(), serviceInstance)
return utils.HandleDeleteError(ctx, r.Client, deprovisionErr, serviceInstance)
}

if operationURL != "" {
Expand Down
29 changes: 8 additions & 21 deletions internal/utils/condition_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ package utils
import (
"context"
"fmt"
"net/http"

"github.com/SAP/sap-btp-service-operator/api/common"
"github.com/SAP/sap-btp-service-operator/client/sm"
smClientTypes "github.com/SAP/sap-btp-service-operator/client/sm/types"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -178,32 +176,21 @@ func SetFailureConditions(operationType smClientTypes.OperationCategory, errorMe
object.SetConditions(conditions)
}

func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, errMsg string, object common.SAPBTPResource) (ctrl.Result, error) {
func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
errMsg := err.Error()
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, setting failure conditions", operationType, object.GetControllerName(), errMsg))
SetFailureConditions(operationType, errMsg, object)
if operationType != smClientTypes.DELETE {
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg))
}
object.SetObservedGeneration(object.GetGeneration())
err := UpdateStatus(ctx, k8sClient, object)
if err != nil {
return ctrl.Result{}, err
}
if operationType == smClientTypes.DELETE {
return ctrl.Result{}, fmt.Errorf(errMsg)
}
return ctrl.Result{}, nil
return ctrl.Result{}, UpdateStatus(ctx, k8sClient, object)
}

func MarkAsTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
//DO NOT REMOVE - 429 error is not reflected to the status
if smError, ok := err.(*sm.ServiceManagerError); !ok || smError.StatusCode != http.StatusTooManyRequests {
SetInProgressConditions(ctx, operationType, err.Error(), object)
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), err.Error()))
if updateErr := UpdateStatus(ctx, k8sClient, object); updateErr != nil {
return ctrl.Result{}, updateErr
}
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), err.Error()))
SetInProgressConditions(ctx, operationType, err.Error(), object)
if updateErr := UpdateStatus(ctx, k8sClient, object); updateErr != nil {
return ctrl.Result{}, updateErr
}

return ctrl.Result{}, err
Expand Down
4 changes: 2 additions & 2 deletions internal/utils/condition_utils_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package utils

import (
"fmt"
"net/http"

"github.com/SAP/sap-btp-service-operator/api/common"
Expand Down Expand Up @@ -168,9 +169,8 @@ var _ = Describe("Condition Utils", func() {
Context("MarkAsNonTransientError", func() {
It("should mark as non-transient error and update status", func() {
operationType := smClientTypes.CREATE
errorMessage := "Non-transient error"

result, err := MarkAsNonTransientError(ctx, k8sClient, operationType, errorMessage, resource)
result, err := MarkAsNonTransientError(ctx, k8sClient, operationType, fmt.Errorf("Non-transient error"), resource)
Expect(err).ToNot(HaveOccurred())
Expect(result).To(Equal(ctrl.Result{}))
})
Expand Down
51 changes: 43 additions & 8 deletions internal/utils/controller_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"net/http"
"strings"
"time"

"github.com/SAP/sap-btp-service-operator/api/common"
"github.com/SAP/sap-btp-service-operator/client/sm"
Expand Down Expand Up @@ -128,19 +129,53 @@ func GetLogger(ctx context.Context) logr.Logger {
return ctx.Value(LogKey{}).(logr.Logger)
}

func HandleError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, resource common.SAPBTPResource, ignoreNonTransient bool) (ctrl.Result, error) {
func HandleError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, resource common.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
var smError *sm.ServiceManagerError
ok := errors.As(err, &smError)
if !ok {
log.Info("unable to cast error to SM error, will be treated as non transient")
return MarkAsNonTransientError(ctx, k8sClient, operationType, err.Error(), resource)
}
if ignoreNonTransient || IsTransientError(smError, log) {
if ok := errors.As(err, &smError); ok {
if smError.StatusCode == http.StatusTooManyRequests {
log.Info(fmt.Sprintf("SM returned 429 (%s), requeueing...", smError.Error()))
return handleRateLimitError(smError, log)
}

log.Info(fmt.Sprintf("SM returned error: %s", smError.Error()))
return MarkAsTransientError(ctx, k8sClient, operationType, smError, resource)
}

return MarkAsNonTransientError(ctx, k8sClient, operationType, smError.Error(), resource)
log.Info(fmt.Sprintf("unable to cast error to SM error, will be treated as non transient. (error: %v)", err))
return MarkAsNonTransientError(ctx, k8sClient, operationType, err, resource)
}

func handleRateLimitError(smError *sm.ServiceManagerError, log logr.Logger) (ctrl.Result, error) {
retryAfterStr := smError.ResponseHeaders.Get("Retry-After")
if len(retryAfterStr) > 0 {
log.Info(fmt.Sprintf("SM returned 429 with Retry-After: %s, requeueing after it...", retryAfterStr))
retryAfter, err := time.Parse(time.DateTime, retryAfterStr[:len(time.DateTime)]) // format 2024-11-11 14:59:33 +0000 UTC
if err != nil {
log.Error(err, "failed to parse Retry-After header, using default requeue time")
} else {
timeToRequeue := time.Until(retryAfter)
log.Info(fmt.Sprintf("requeueing after %d minutes, %d seconds", int(timeToRequeue.Minutes()), int(timeToRequeue.Seconds())%60))
return ctrl.Result{RequeueAfter: timeToRequeue}, nil
}
}

return ctrl.Result{Requeue: true}, nil
}

func HandleDeleteError(ctx context.Context, k8sClient client.Client, err error, object common.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
log.Info(fmt.Sprintf("handling delete error: %v", err))
var smError *sm.ServiceManagerError
if errors.As(err, &smError) && smError.StatusCode == http.StatusTooManyRequests {
return handleRateLimitError(smError, log)
}

if _, updateErr := MarkAsNonTransientError(ctx, k8sClient, smClientTypes.DELETE, err, object); updateErr != nil {
log.Error(updateErr, "failed to update resource status")
return ctrl.Result{}, updateErr
}
return ctrl.Result{}, err
}

func IsTransientError(smError *sm.ServiceManagerError, log logr.Logger) bool {
Expand Down

0 comments on commit 9cebc45

Please sign in to comment.