Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove rate limit error from state #477

Merged
merged 7 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions client/sm/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,11 @@ type Client interface {
}

type ServiceManagerError struct {
ErrorType string `json:"error,omitempty"`
Description string `json:"description,omitempty"`
StatusCode int `json:"-"`
BrokerError *common.HTTPStatusCodeError `json:"broker_error,omitempty"`
ErrorType string `json:"error,omitempty"`
Description string `json:"description,omitempty"`
StatusCode int `json:"-"`
BrokerError *common.HTTPStatusCodeError `json:"broker_error,omitempty"`
ResponseHeaders http.Header `json:"-"`
}

func (e *ServiceManagerError) Error() string {
Expand Down Expand Up @@ -590,7 +591,8 @@ func handleResponseError(response *http.Response) error {
}

smError := &ServiceManagerError{
StatusCode: response.StatusCode,
StatusCode: response.StatusCode,
ResponseHeaders: response.Header,
}
_ = json.Unmarshal(body, &smError)

Expand Down
11 changes: 5 additions & 6 deletions controllers/servicebinding_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s
_, bindingParameters, err := utils.BuildSMRequestParameters(serviceBinding.Namespace, serviceBinding.Spec.ParametersFrom, serviceBinding.Spec.Parameters)
if err != nil {
log.Error(err, "failed to parse smBinding parameters")
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err.Error(), serviceBinding)
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err, serviceBinding)
}

smBinding, operationURL, bindErr := smClient.Bind(&smClientTypes.ServiceBinding{
Expand All @@ -278,13 +278,13 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s

if bindErr != nil {
log.Error(err, "failed to create service binding", "serviceInstanceID", serviceInstance.Status.InstanceID)
return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, bindErr, serviceBinding, true)
return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, bindErr, serviceBinding)
}

if operationURL != "" {
var bindingID string
if bindingID = sm.ExtractBindingID(operationURL); len(bindingID) == 0 {
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, fmt.Sprintf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, fmt.Errorf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding)
}
serviceBinding.Status.BindingID = bindingID

Expand Down Expand Up @@ -360,8 +360,7 @@ func (r *ServiceBindingReconciler) delete(ctx context.Context, serviceBinding *v
log.Info(fmt.Sprintf("Deleting binding with id %v from SM", serviceBinding.Status.BindingID))
operationURL, unbindErr := smClient.Unbind(serviceBinding.Status.BindingID, nil, utils.BuildUserInfo(ctx, serviceBinding.Spec.UserInfo))
if unbindErr != nil {
// delete will proceed anyway
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, unbindErr.Error(), serviceBinding)
return utils.HandleDeleteError(ctx, r.Client, unbindErr, serviceBinding)
}

if operationURL != "" {
Expand Down Expand Up @@ -847,7 +846,7 @@ func (r *ServiceBindingReconciler) handleSecretError(ctx context.Context, op smC
log := utils.GetLogger(ctx)
log.Error(err, fmt.Sprintf("failed to store secret %s for binding %s", binding.Spec.SecretName, binding.Name))
if apierrors.ReasonForError(err) == metav1.StatusReasonUnknown {
return utils.MarkAsNonTransientError(ctx, r.Client, op, err.Error(), binding)
return utils.MarkAsNonTransientError(ctx, r.Client, op, err, binding)
}
return utils.MarkAsTransientError(ctx, r.Client, op, err, binding)
}
Expand Down
7 changes: 3 additions & 4 deletions controllers/servicebinding_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ import (
"context"
"encoding/json"
"errors"
"net/http"
"strings"

"github.com/lithammer/dedent"
authv1 "k8s.io/api/authentication/v1"
"net/http"
"strings"

"github.com/SAP/sap-btp-service-operator/api/common"
"github.com/SAP/sap-btp-service-operator/internal/utils"
Expand Down Expand Up @@ -400,7 +399,7 @@ var _ = Describe("ServiceBinding controller", func() {
})
})

When("SM returned transient error(429)", func() {
When("SM returned transient error(429) without retry-after header", func() {
BeforeEach(func() {
errorMessage = "too many requests"
fakeClient.BindReturnsOnCall(0, nil, "", &sm.ServiceManagerError{
Expand Down
10 changes: 5 additions & 5 deletions controllers/serviceinstance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func (r *ServiceInstanceReconciler) createInstance(ctx context.Context, smClient
if err != nil {
// if parameters are invalid there is nothing we can do, the user should fix it according to the error message in the condition
log.Error(err, "failed to parse instance parameters")
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err.Error(), serviceInstance)
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err, serviceInstance)
}

provision, provisionErr := smClient.Provision(&smClientTypes.ServiceInstance{
Expand All @@ -193,7 +193,7 @@ func (r *ServiceInstanceReconciler) createInstance(ctx context.Context, smClient
if provisionErr != nil {
log.Error(provisionErr, "failed to create service instance", "serviceOfferingName", serviceInstance.Spec.ServiceOfferingName,
"servicePlanName", serviceInstance.Spec.ServicePlanName)
return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, provisionErr, serviceInstance, true)
return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, provisionErr, serviceInstance)
}

serviceInstance.Status.InstanceID = provision.InstanceID
Expand Down Expand Up @@ -231,7 +231,7 @@ func (r *ServiceInstanceReconciler) updateInstance(ctx context.Context, smClient
_, instanceParameters, err := utils.BuildSMRequestParameters(serviceInstance.Namespace, serviceInstance.Spec.ParametersFrom, serviceInstance.Spec.Parameters)
if err != nil {
log.Error(err, "failed to parse instance parameters")
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.UPDATE, fmt.Sprintf("failed to parse parameters: %v", err.Error()), serviceInstance)
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance)
}

_, operationURL, err := smClient.UpdateInstance(serviceInstance.Status.InstanceID, &smClientTypes.ServiceInstance{
Expand All @@ -242,7 +242,7 @@ func (r *ServiceInstanceReconciler) updateInstance(ctx context.Context, smClient

if err != nil {
log.Error(err, fmt.Sprintf("failed to update service instance with ID %s", serviceInstance.Status.InstanceID))
return utils.HandleError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance, true)
return utils.HandleError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance)
}

if operationURL != "" {
Expand Down Expand Up @@ -296,7 +296,7 @@ func (r *ServiceInstanceReconciler) deleteInstance(ctx context.Context, serviceI
operationURL, deprovisionErr := smClient.Deprovision(serviceInstance.Status.InstanceID, nil, utils.BuildUserInfo(ctx, serviceInstance.Spec.UserInfo))
if deprovisionErr != nil {
// delete will proceed anyway
return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, deprovisionErr.Error(), serviceInstance)
return utils.HandleDeleteError(ctx, r.Client, deprovisionErr, serviceInstance)
}

if operationURL != "" {
Expand Down
29 changes: 8 additions & 21 deletions internal/utils/condition_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ package utils
import (
"context"
"fmt"
"net/http"

"github.com/SAP/sap-btp-service-operator/api/common"
"github.com/SAP/sap-btp-service-operator/client/sm"
smClientTypes "github.com/SAP/sap-btp-service-operator/client/sm/types"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -178,32 +176,21 @@ func SetFailureConditions(operationType smClientTypes.OperationCategory, errorMe
object.SetConditions(conditions)
}

func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, errMsg string, object common.SAPBTPResource) (ctrl.Result, error) {
func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
errMsg := err.Error()
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, setting failure conditions", operationType, object.GetControllerName(), errMsg))
SetFailureConditions(operationType, errMsg, object)
if operationType != smClientTypes.DELETE {
log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg))
}
object.SetObservedGeneration(object.GetGeneration())
err := UpdateStatus(ctx, k8sClient, object)
if err != nil {
return ctrl.Result{}, err
}
if operationType == smClientTypes.DELETE {
return ctrl.Result{}, fmt.Errorf(errMsg)
}
return ctrl.Result{}, nil
return ctrl.Result{}, UpdateStatus(ctx, k8sClient, object)
}

func MarkAsTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
//DO NOT REMOVE - 429 error is not reflected to the status
if smError, ok := err.(*sm.ServiceManagerError); !ok || smError.StatusCode != http.StatusTooManyRequests {
SetInProgressConditions(ctx, operationType, err.Error(), object)
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), err.Error()))
if updateErr := UpdateStatus(ctx, k8sClient, object); updateErr != nil {
return ctrl.Result{}, updateErr
}
log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), err.Error()))
SetInProgressConditions(ctx, operationType, err.Error(), object)
if updateErr := UpdateStatus(ctx, k8sClient, object); updateErr != nil {
return ctrl.Result{}, updateErr
}

return ctrl.Result{}, err
Expand Down
4 changes: 2 additions & 2 deletions internal/utils/condition_utils_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package utils

import (
"fmt"
"net/http"

"github.com/SAP/sap-btp-service-operator/api/common"
Expand Down Expand Up @@ -168,9 +169,8 @@ var _ = Describe("Condition Utils", func() {
Context("MarkAsNonTransientError", func() {
It("should mark as non-transient error and update status", func() {
operationType := smClientTypes.CREATE
errorMessage := "Non-transient error"

result, err := MarkAsNonTransientError(ctx, k8sClient, operationType, errorMessage, resource)
result, err := MarkAsNonTransientError(ctx, k8sClient, operationType, fmt.Errorf("Non-transient error"), resource)
Expect(err).ToNot(HaveOccurred())
Expect(result).To(Equal(ctrl.Result{}))
})
Expand Down
51 changes: 43 additions & 8 deletions internal/utils/controller_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"net/http"
"strings"
"time"

"github.com/SAP/sap-btp-service-operator/api/common"
"github.com/SAP/sap-btp-service-operator/client/sm"
Expand Down Expand Up @@ -128,19 +129,53 @@ func GetLogger(ctx context.Context) logr.Logger {
return ctx.Value(LogKey{}).(logr.Logger)
}

func HandleError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, resource common.SAPBTPResource, ignoreNonTransient bool) (ctrl.Result, error) {
func HandleError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, resource common.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
var smError *sm.ServiceManagerError
ok := errors.As(err, &smError)
if !ok {
log.Info("unable to cast error to SM error, will be treated as non transient")
return MarkAsNonTransientError(ctx, k8sClient, operationType, err.Error(), resource)
}
if ignoreNonTransient || IsTransientError(smError, log) {
if ok := errors.As(err, &smError); ok {
if smError.StatusCode == http.StatusTooManyRequests {
log.Info(fmt.Sprintf("SM returned 429 (%s), requeueing...", smError.Error()))
return handleRateLimitError(smError, log)
}

log.Info(fmt.Sprintf("SM returned error: %s", smError.Error()))
return MarkAsTransientError(ctx, k8sClient, operationType, smError, resource)
}

return MarkAsNonTransientError(ctx, k8sClient, operationType, smError.Error(), resource)
log.Info(fmt.Sprintf("unable to cast error to SM error, will be treated as non transient. (error: %v)", err))
return MarkAsNonTransientError(ctx, k8sClient, operationType, err, resource)
}

func handleRateLimitError(smError *sm.ServiceManagerError, log logr.Logger) (ctrl.Result, error) {
retryAfterStr := smError.ResponseHeaders.Get("Retry-After")
if len(retryAfterStr) > 0 {
log.Info(fmt.Sprintf("SM returned 429 with Retry-After: %s, requeueing after it...", retryAfterStr))
retryAfter, err := time.Parse(time.DateTime, retryAfterStr[:len(time.DateTime)]) // format 2024-11-11 14:59:33 +0000 UTC
if err != nil {
log.Error(err, "failed to parse Retry-After header, using default requeue time")
} else {
timeToRequeue := time.Until(retryAfter)
log.Info(fmt.Sprintf("requeueing after %d minutes, %d seconds", int(timeToRequeue.Minutes()), int(timeToRequeue.Seconds())%60))
return ctrl.Result{RequeueAfter: timeToRequeue}, nil
}
}

return ctrl.Result{Requeue: true}, nil
}

func HandleDeleteError(ctx context.Context, k8sClient client.Client, err error, object common.SAPBTPResource) (ctrl.Result, error) {
log := GetLogger(ctx)
log.Info(fmt.Sprintf("handling delete error: %v", err))
var smError *sm.ServiceManagerError
if errors.As(err, &smError) && smError.StatusCode == http.StatusTooManyRequests {
return handleRateLimitError(smError, log)
}

if _, updateErr := MarkAsNonTransientError(ctx, k8sClient, smClientTypes.DELETE, err, object); updateErr != nil {
log.Error(updateErr, "failed to update resource status")
return ctrl.Result{}, updateErr
}
return ctrl.Result{}, err
}

func IsTransientError(smError *sm.ServiceManagerError, log logr.Logger) bool {
Expand Down
Loading