diff --git a/client/option.go b/client/option.go index 29a0161908..ce2235a33d 100644 --- a/client/option.go +++ b/client/option.go @@ -325,12 +325,13 @@ func WithFailureRetry(p *retry.FailurePolicy) Option { if p == nil { return } - di.Push(fmt.Sprintf("WithFailureRetry(%+v)", *p)) + di.Push(fmt.Sprintf("WithFailureRetry(%+v)", p)) if o.RetryMethodPolicies == nil { o.RetryMethodPolicies = make(map[string]retry.Policy) } - if o.RetryMethodPolicies[retry.Wildcard].BackupPolicy != nil { - panic("BackupPolicy has been setup, cannot support Failure Retry at same time") + if o.RetryMethodPolicies[retry.Wildcard].MixedPolicy != nil || + o.RetryMethodPolicies[retry.Wildcard].BackupPolicy != nil { + panic("MixedPolicy or BackupPolicy has been setup, cannot support Failure Retry at same time") } o.RetryMethodPolicies[retry.Wildcard] = retry.BuildFailurePolicy(p) }} @@ -342,17 +343,33 @@ func WithBackupRequest(p *retry.BackupPolicy) Option { if p == nil { return } - di.Push(fmt.Sprintf("WithBackupRequest(%+v)", *p)) + di.Push(fmt.Sprintf("WithBackupRequest(%+v)", p)) if o.RetryMethodPolicies == nil { o.RetryMethodPolicies = make(map[string]retry.Policy) } - if o.RetryMethodPolicies[retry.Wildcard].FailurePolicy != nil { - panic("BackupPolicy has been setup, cannot support Failure Retry at same time") + if o.RetryMethodPolicies[retry.Wildcard].MixedPolicy != nil || + o.RetryMethodPolicies[retry.Wildcard].FailurePolicy != nil { + panic("MixedPolicy or BackupPolicy has been setup, cannot support Failure Retry at same time") } o.RetryMethodPolicies[retry.Wildcard] = retry.BuildBackupRequest(p) }} } +// WithMixedRetry sets the mixed retry policy for client, it will take effect for all methods. +func WithMixedRetry(p *retry.MixedPolicy) Option { + return Option{F: func(o *client.Options, di *utils.Slice) { + if p == nil { + return + } + di.Push(fmt.Sprintf("WithMixedRetry(%+v)", p)) + if o.RetryMethodPolicies == nil { + o.RetryMethodPolicies = make(map[string]retry.Policy) + } + // no need to check if BackupPolicy or FailurePolicy are been setup, just let mixed retry replace it + o.RetryMethodPolicies[retry.Wildcard] = retry.BuildMixedPolicy(p) + }} +} + // WithRetryMethodPolicies sets the retry policy for method. // The priority is higher than WithFailureRetry and WithBackupRequest. Only the methods which are not included by // this config will use the policy that is configured by WithFailureRetry or WithBackupRequest . diff --git a/client/option_test.go b/client/option_test.go index 883f67481c..4e17645f19 100644 --- a/client/option_test.go +++ b/client/option_test.go @@ -19,6 +19,7 @@ package client import ( "context" "crypto/tls" + "errors" "fmt" "reflect" "testing" @@ -53,46 +54,76 @@ import ( ) func TestRetryOptionDebugInfo(t *testing.T) { - fp := retry.NewFailurePolicy() - fp.WithDDLStop() - expectPolicyStr := "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:false DDLStop:true " + - "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:none CfgItems:map[]} RetrySameNode:false ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" - policyStr := fmt.Sprintf("WithFailureRetry(%+v)", fp) - test.Assert(t, policyStr == expectPolicyStr, policyStr) - - fp.WithFixedBackOff(10) - expectPolicyStr = "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:false DDLStop:true " + - "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:fixed CfgItems:map[fix_ms:10]} RetrySameNode:false ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" - policyStr = fmt.Sprintf("WithFailureRetry(%+v)", fp) - test.Assert(t, policyStr == expectPolicyStr, policyStr) - - fp.WithRandomBackOff(10, 20) - fp.DisableChainRetryStop() - expectPolicyStr = "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true DDLStop:true " + - "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:random CfgItems:map[max_ms:20 min_ms:10]} RetrySameNode:false ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" - policyStr = fmt.Sprintf("WithFailureRetry(%+v)", fp) - test.Assert(t, policyStr == expectPolicyStr, policyStr) - - fp.WithRetrySameNode() - expectPolicyStr = "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true DDLStop:true " + - "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:random CfgItems:map[max_ms:20 min_ms:10]} RetrySameNode:true ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" - policyStr = fmt.Sprintf("WithFailureRetry(%+v)", fp) - test.Assert(t, policyStr == expectPolicyStr, policyStr) - - fp.WithSpecifiedResultRetry(&retry.ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { - return false - }}) - expectPolicyStr = "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true DDLStop:true " + - "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:random CfgItems:map[max_ms:20 min_ms:10]} RetrySameNode:true ShouldResultRetry:{ErrorRetry:true, RespRetry:false}})" - policyStr = fmt.Sprintf("WithFailureRetry(%+v)", fp) - test.Assert(t, policyStr == expectPolicyStr, policyStr) + t.Run("FailurePolicy", func(t *testing.T) { + fp := retry.NewFailurePolicy() + fp.WithDDLStop() + expectPolicyStr := "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:false DDLStop:true " + + "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:none CfgItems:map[]} RetrySameNode:false ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" + opt := WithFailureRetry(fp) + err := checkOneOptionDebugInfo(t, opt, expectPolicyStr) + test.Assert(t, err == nil, err) + + fp.WithFixedBackOff(10) + expectPolicyStr = "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:false DDLStop:true " + + "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:fixed CfgItems:map[fix_ms:10]} RetrySameNode:false ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" + opt = WithFailureRetry(fp) + err = checkOneOptionDebugInfo(t, opt, expectPolicyStr) + test.Assert(t, err == nil, err) + + fp.WithRandomBackOff(10, 20) + fp.DisableChainRetryStop() + expectPolicyStr = "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true DDLStop:true " + + "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:random CfgItems:map[max_ms:20 min_ms:10]} RetrySameNode:false ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" + opt = WithFailureRetry(fp) + err = checkOneOptionDebugInfo(t, opt, expectPolicyStr) + test.Assert(t, err == nil, err) + + fp.WithRetrySameNode() + expectPolicyStr = "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true DDLStop:true " + + "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:random CfgItems:map[max_ms:20 min_ms:10]} RetrySameNode:true ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" + opt = WithFailureRetry(fp) + err = checkOneOptionDebugInfo(t, opt, expectPolicyStr) + test.Assert(t, err == nil, err) + + fp.WithSpecifiedResultRetry(&retry.ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + return false + }}) + expectPolicyStr = "WithFailureRetry({StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true DDLStop:true " + + "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:random CfgItems:map[max_ms:20 min_ms:10]} RetrySameNode:true ShouldResultRetry:{ErrorRetry:true, RespRetry:false}})" + opt = WithFailureRetry(fp) + err = checkOneOptionDebugInfo(t, opt, expectPolicyStr) + test.Assert(t, err == nil, err) + }) - bp := retry.NewBackupPolicy(20) - expectPolicyStr = "WithBackupRequest({RetryDelayMS:20 StopPolicy:{MaxRetryTimes:1 MaxDurationMS:0 DisableChainStop:false " + - "DDLStop:false CBPolicy:{ErrorRate:0.1}} RetrySameNode:false})" - policyStr = fmt.Sprintf("WithBackupRequest(%+v)", bp) - test.Assert(t, policyStr == expectPolicyStr, policyStr) - WithBackupRequest(bp) + t.Run("FailurePolicy", func(t *testing.T) { + bp := retry.NewBackupPolicy(20) + expectPolicyStr := "WithBackupRequest({RetryDelayMS:20 StopPolicy:{MaxRetryTimes:1 MaxDurationMS:0 DisableChainStop:false " + + "DDLStop:false CBPolicy:{ErrorRate:0.1}} RetrySameNode:false})" + opt := WithBackupRequest(bp) + err := checkOneOptionDebugInfo(t, opt, expectPolicyStr) + test.Assert(t, err == nil, err) + }) + + t.Run("MixedPolicy", func(t *testing.T) { + mp := retry.NewMixedPolicy(100) + mp.WithDDLStop() + expectPolicyStr := "WithMixedRetry({RetryDelayMS:100 StopPolicy:{MaxRetryTimes:1 MaxDurationMS:0 DisableChainStop:false " + + "DDLStop:true CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:none CfgItems:map[]} RetrySameNode:false " + + "ShouldResultRetry:{ErrorRetry:false, RespRetry:false}})" + opt := WithMixedRetry(mp) + err := checkOneOptionDebugInfo(t, opt, expectPolicyStr) + test.Assert(t, err == nil, err) + + mp.WithSpecifiedResultRetry(&retry.ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + return false + }}) + expectPolicyStr = "WithMixedRetry({RetryDelayMS:100 StopPolicy:{MaxRetryTimes:1 MaxDurationMS:0 DisableChainStop:false " + + "DDLStop:true CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:none CfgItems:map[]} RetrySameNode:false " + + "ShouldResultRetry:{ErrorRetry:true, RespRetry:false}})" + opt = WithMixedRetry(mp) + err = checkOneOptionDebugInfo(t, opt, expectPolicyStr) + test.Assert(t, err == nil, err) + }) } func TestRetryOption(t *testing.T) { @@ -708,3 +739,15 @@ func TestWithGRPCTLSConfig(t *testing.T) { opts := client.NewOptions([]client.Option{WithGRPCTLSConfig(cfg)}) test.Assert(t, opts.GRPCConnectOpts != nil) } + +func checkOneOptionDebugInfo(t *testing.T, opt Option, expectStr string) error { + o := &Options{} + o.Apply([]Option{opt}) + if len(o.DebugInfo) != 1 { + return errors.New("length of DebugInfo is unexpected") + } + if o.DebugInfo[0] != expectStr { + return fmt.Errorf("DebugInfo not match with expect str:\n debugInfo=%s", o.DebugInfo[0]) + } + return nil +} diff --git a/pkg/retry/backup.go b/pkg/retry/backup.go index fa3bda5da1..b189f38293 100644 --- a/pkg/retry/backup.go +++ b/pkg/retry/backup.go @@ -20,7 +20,10 @@ import ( "fmt" ) -const maxBackupRetryTimes = 2 +const ( + maxBackupRetryTimes = 2 + defaultBackupRetryTimes = 1 +) // NewBackupPolicy init backup request policy // the param delayMS is suggested to set as TP99 @@ -31,7 +34,7 @@ func NewBackupPolicy(delayMS uint32) *BackupPolicy { p := &BackupPolicy{ RetryDelayMS: delayMS, StopPolicy: StopPolicy{ - MaxRetryTimes: 1, + MaxRetryTimes: defaultBackupRetryTimes, DisableChainStop: false, CBPolicy: CBPolicy{ ErrorRate: defaultCBErrRate, @@ -71,3 +74,35 @@ func (p *BackupPolicy) WithRetrySameNode() { func (p *BackupPolicy) String() string { return fmt.Sprintf("{RetryDelayMS:%+v StopPolicy:%+v RetrySameNode:%+v}", p.RetryDelayMS, p.StopPolicy, p.RetrySameNode) } + +// Equals to check if BackupPolicy is equal +func (p *BackupPolicy) Equals(np *BackupPolicy) bool { + if p == nil { + return np == nil + } + if np == nil { + return false + } + if p.RetryDelayMS != np.RetryDelayMS { + return false + } + if p.StopPolicy != np.StopPolicy { + return false + } + if p.RetrySameNode != np.RetrySameNode { + return false + } + + return true +} + +func (p *BackupPolicy) DeepCopy() *BackupPolicy { + if p == nil { + return nil + } + return &BackupPolicy{ + RetryDelayMS: p.RetryDelayMS, + StopPolicy: p.StopPolicy, // not a pointer, will copy the value here + RetrySameNode: p.RetrySameNode, + } +} diff --git a/pkg/retry/backup_retryer.go b/pkg/retry/backup_retryer.go index 1ff8ff8e3c..21980f57ca 100644 --- a/pkg/retry/backup_retryer.go +++ b/pkg/retry/backup_retryer.go @@ -30,7 +30,6 @@ import ( "github.com/cloudwego/kitex/pkg/circuitbreak" "github.com/cloudwego/kitex/pkg/gofunc" "github.com/cloudwego/kitex/pkg/kerrors" - "github.com/cloudwego/kitex/pkg/klog" "github.com/cloudwego/kitex/pkg/rpcinfo" "github.com/cloudwego/kitex/pkg/utils" ) @@ -48,16 +47,17 @@ func newBackupRetryer(policy Policy, cbC *cbContainer) (Retryer, error) { type backupRetryer struct { enable bool + retryDelay time.Duration policy *BackupPolicy cbContainer *cbContainer - retryDelay time.Duration sync.RWMutex errMsg string } type resultWrapper struct { - ri rpcinfo.RPCInfo - err error + ri rpcinfo.RPCInfo + resp interface{} + err error } // ShouldRetry implements the Retryer interface. @@ -92,12 +92,13 @@ func (r *backupRetryer) Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rpc retryTimes := r.policy.StopPolicy.MaxRetryTimes retryDelay := r.retryDelay r.RUnlock() + var callTimes int32 = 0 var callCosts utils.StringBuilder callCosts.RawStringBuilder().Grow(32) var recordCostDoing int32 = 0 var abort int32 = 0 - finishedCount := 0 + finishedErrCount := 0 // notice: buff num of chan is very important here, it cannot less than call times, or the below chan receive will block done := make(chan *resultWrapper, retryTimes+1) cbKey, _ := r.cbContainer.cbCtl.GetKey(ctx, req) @@ -126,7 +127,7 @@ func (r *backupRetryer) Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rpc if panicInfo := recover(); panicInfo != nil { e = panicToErr(ctx, panicInfo, firstRI) } - done <- &resultWrapper{cRI, e} + done <- &resultWrapper{ri: cRI, err: e} }() ct := atomic.AddInt32(&callTimes, 1) callStart := time.Now() @@ -152,7 +153,7 @@ func (r *backupRetryer) Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rpc // There will be only one request (goroutine) pass the `checkRPCState`, others will skip decoding // and return `ErrRPCFinish`, to avoid concurrent write to response and save the cost of decoding. // We can safely ignore this error and wait for the response of the passed goroutine. - if finishedCount++; finishedCount >= retryTimes+1 { + if finishedErrCount++; finishedErrCount >= retryTimes+1 { // But if all requests return this error, it must be a bug, preventive panic to avoid dead loop panic(errUnexpectedFinish) } @@ -178,29 +179,21 @@ func (r *backupRetryer) UpdatePolicy(rp Policy) (err error) { r.Unlock() return nil } - var errMsg string if rp.BackupPolicy == nil || rp.Type != BackupType { - errMsg = "BackupPolicy is nil or retry type not match, cannot do update in backupRetryer" - err = errors.New(errMsg) + err = errors.New("BackupPolicy is nil or retry type not match, cannot do update in backupRetryer") } - if errMsg == "" && (rp.BackupPolicy.RetryDelayMS == 0 || rp.BackupPolicy.StopPolicy.MaxRetryTimes < 0 || - rp.BackupPolicy.StopPolicy.MaxRetryTimes > maxBackupRetryTimes) { - errMsg = "invalid backup request delay duration or retryTimes" - err = errors.New(errMsg) + if err == nil && rp.BackupPolicy.RetryDelayMS == 0 { + err = errors.New("invalid retry delay duration in backupRetryer") } - if errMsg == "" { - if e := checkCBErrorRate(&rp.BackupPolicy.StopPolicy.CBPolicy); e != nil { - rp.BackupPolicy.StopPolicy.CBPolicy.ErrorRate = defaultCBErrRate - errMsg = fmt.Sprintf("backupRetryer %s, use default %0.2f", e.Error(), defaultCBErrRate) - klog.Warnf(errMsg) - } + if err == nil { + err = checkStopPolicy(&rp.BackupPolicy.StopPolicy, maxBackupRetryTimes, r) } r.Lock() defer r.Unlock() r.enable = rp.Enable if err != nil { - r.errMsg = errMsg + r.errMsg = err.Error() return err } r.policy = rp.BackupPolicy @@ -220,14 +213,11 @@ func (r *backupRetryer) AppendErrMsgIfNeeded(ctx context.Context, err error, ri func (r *backupRetryer) Dump() map[string]interface{} { r.RLock() defer r.RUnlock() + dm := map[string]interface{}{"enable": r.enable, "backup_request": r.policy} if r.errMsg != "" { - return map[string]interface{}{ - "enable": r.enable, - "backupRequest": r.policy, - "errMsg": r.errMsg, - } + dm["err_msg"] = r.errMsg } - return map[string]interface{}{"enable": r.enable, "backupRequest": r.policy} + return dm } // Type implements the Retryer interface. diff --git a/pkg/retry/backup_test.go b/pkg/retry/backup_test.go index 4193f2de24..ca09999883 100644 --- a/pkg/retry/backup_test.go +++ b/pkg/retry/backup_test.go @@ -17,9 +17,15 @@ package retry import ( + "context" + "sync/atomic" "testing" + "time" "github.com/cloudwego/thriftgo/pkg/test" + + "github.com/cloudwego/kitex/pkg/rpcinfo" + "github.com/cloudwego/kitex/pkg/stats" ) // test BackupPolicy string @@ -31,3 +37,65 @@ func TestBackupPolicy_String(t *testing.T) { "DisableChainStop:false DDLStop:false CBPolicy:{ErrorRate:0.3}} RetrySameNode:true}" test.Assert(t, r.String() == msg) } + +// test BackupPolicy call while rpcTime > delayTime +func TestBackupPolicyCall(t *testing.T) { + ctx := context.Background() + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{Wildcard: { + Enable: true, + Type: 1, + BackupPolicy: &BackupPolicy{ + RetryDelayMS: 30, + StopPolicy: StopPolicy{ + MaxRetryTimes: 2, + DisableChainStop: false, + CBPolicy: CBPolicy{ + ErrorRate: defaultCBErrRate, + }, + }, + }, + }}, nil) + test.Assert(t, err == nil, err) + + callTimes := int32(0) + firstRI := genRPCInfo() + secondRI := genRPCInfoWithRemoteTag(remoteTags) + ctx = rpcinfo.NewCtxWithRPCInfo(ctx, firstRI) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + atomic.AddInt32(&callTimes, 1) + if atomic.LoadInt32(&callTimes) == 1 { + // mock timeout for the first request and get the response of the backup request. + time.Sleep(time.Millisecond * 50) + return firstRI, nil, nil + } + return secondRI, nil, nil + }, firstRI, nil) + test.Assert(t, err == nil, err) + test.Assert(t, atomic.LoadInt32(&callTimes) == 2) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) +} + +func TestBackupRetryWithRPCInfo(t *testing.T) { + // backup retry + ctx := context.Background() + rc := NewRetryContainer() + + ri := genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) + rpcinfo.Record(ctx, ri, stats.RPCStart, nil) + + // call with retry policy + var callTimes int32 + policy := BuildBackupRequest(NewBackupPolicy(10)) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &policy, retryCall(&callTimes, ri, true), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + test.Assert(t, ri.Stats().GetEvent(stats.RPCStart).Status() == stats.StatusInfo) + test.Assert(t, ri.Stats().GetEvent(stats.RPCFinish).Status() == stats.StatusInfo) + test.Assert(t, ri.To().Address().String() == "10.20.30.40:8888") + test.Assert(t, atomic.LoadInt32(&callTimes) == 2) +} diff --git a/pkg/retry/failure.go b/pkg/retry/failure.go index ffb0fdf2ac..c20e15bbcc 100644 --- a/pkg/retry/failure.go +++ b/pkg/retry/failure.go @@ -28,6 +28,13 @@ import ( const maxFailureRetryTimes = 5 +// AllErrorRetry is common choice for ShouldResultRetry. +func AllErrorRetry() *ShouldResultRetry { + return &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + return err != nil + }} +} + // NewFailurePolicy init default failure retry policy func NewFailurePolicy() *FailurePolicy { p := &FailurePolicy{ @@ -115,14 +122,104 @@ func (p *FailurePolicy) WithSpecifiedResultRetry(rr *ShouldResultRetry) { // String prints human readable information. func (p *FailurePolicy) String() string { return fmt.Sprintf("{StopPolicy:%+v BackOffPolicy:%+v RetrySameNode:%+v "+ - "ShouldResultRetry:{ErrorRetry:%t, RespRetry:%t}}", p.StopPolicy, p.BackOffPolicy, p.RetrySameNode, p.IsErrorRetryNonNil(), p.IsRespRetryNonNil()) + "ShouldResultRetry:{ErrorRetry:%t, RespRetry:%t}}", p.StopPolicy, p.BackOffPolicy, p.RetrySameNode, p.isErrorRetryWithCtxNonNil(), p.isRespRetryWithCtxNonNil()) } -// AllErrorRetry is common choice for ShouldResultRetry. -func AllErrorRetry() *ShouldResultRetry { - return &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - return err != nil - }} +// Equals to check if FailurePolicy is equal +func (p *FailurePolicy) Equals(np *FailurePolicy) bool { + if p == nil { + return np == nil + } + if np == nil { + return false + } + if p.StopPolicy != np.StopPolicy { + return false + } + if !p.BackOffPolicy.Equals(np.BackOffPolicy) { + return false + } + if p.RetrySameNode != np.RetrySameNode { + return false + } + if p.Extra != np.Extra { + return false + } + // don't need to check `ShouldResultRetry`, ShouldResultRetry is only setup by option + // in remote config case will always return false if check it + return true +} + +func (p *FailurePolicy) DeepCopy() *FailurePolicy { + if p == nil { + return nil + } + return &FailurePolicy{ + StopPolicy: p.StopPolicy, + BackOffPolicy: p.BackOffPolicy.DeepCopy(), + RetrySameNode: p.RetrySameNode, + ShouldResultRetry: p.ShouldResultRetry, // don't need DeepCopy + Extra: p.Extra, + } +} + +// isRespRetryWithCtxNonNil is used to check if RespRetryWithCtx is nil. +func (p *FailurePolicy) isRespRetryWithCtxNonNil() bool { + return p.ShouldResultRetry != nil && p.ShouldResultRetry.RespRetryWithCtx != nil +} + +// isErrorRetryWithCtxNonNil is used to check if ErrorRetryWithCtx is nil +func (p *FailurePolicy) isErrorRetryWithCtxNonNil() bool { + return p.ShouldResultRetry != nil && p.ShouldResultRetry.ErrorRetryWithCtx != nil +} + +// isRespRetryNonNil is used to check if RespRetry is nil. +// Deprecated: please use isRespRetryWithCtxNonNil instead of isRespRetryNonNil. +func (p *FailurePolicy) isRespRetryNonNil() bool { + return p.ShouldResultRetry != nil && p.ShouldResultRetry.RespRetry != nil +} + +// isErrorRetryNonNil is used to check if ErrorRetry is nil. +// Deprecated: please use IsErrorRetryWithCtxNonNil instead of isErrorRetryNonNil. +func (p *FailurePolicy) isErrorRetryNonNil() bool { + return p.ShouldResultRetry != nil && p.ShouldResultRetry.ErrorRetry != nil +} + +// isRetryForTimeout is used to check if timeout error need to retry +func (p *FailurePolicy) isRetryForTimeout() bool { + return p.ShouldResultRetry == nil || !p.ShouldResultRetry.NotRetryForTimeout +} + +// isRespRetry is used to check if the resp need to do retry. +func (p *FailurePolicy) isRespRetry(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { + // note: actually, it is better to check IsRespRetry to ignore the bad cases, + // but IsRespRetry is a deprecated definition and here will be executed for every call, depends on ConvertResultRetry to ensure the compatibility + return p.isRespRetryWithCtxNonNil() && p.ShouldResultRetry.RespRetryWithCtx(ctx, resp, ri) +} + +// isErrorRetry is used to check if the error need to do retry. +func (p *FailurePolicy) isErrorRetry(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + // note: actually, it is better to check IsErrorRetry to ignore the bad cases, + // but IsErrorRetry is a deprecated definition and here will be executed for every call, depends on ConvertResultRetry to ensure the compatibility + return p.isErrorRetryWithCtxNonNil() && p.ShouldResultRetry.ErrorRetryWithCtx(ctx, err, ri) +} + +// convertResultRetry is used to convert 'ErrorRetry and RespRetry' to 'ErrorRetryWithCtx and RespRetryWithCtx' +func (p *FailurePolicy) convertResultRetry() { + if p == nil || p.ShouldResultRetry == nil { + return + } + rr := p.ShouldResultRetry + if rr.ErrorRetry != nil && rr.ErrorRetryWithCtx == nil { + rr.ErrorRetryWithCtx = func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + return rr.ErrorRetry(err, ri) + } + } + if rr.RespRetry != nil && rr.RespRetryWithCtx == nil { + rr.RespRetryWithCtx = func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { + return rr.RespRetry(resp, ri) + } + } } // BackOff is the interface of back off implements diff --git a/pkg/retry/failure_retryer.go b/pkg/retry/failure_retryer.go index 11686eb6c4..694855f0d0 100644 --- a/pkg/retry/failure_retryer.go +++ b/pkg/retry/failure_retryer.go @@ -33,7 +33,7 @@ import ( ) func newFailureRetryer(policy Policy, r *ShouldResultRetry, cbC *cbContainer) (Retryer, error) { - fr := &failureRetryer{specifiedResultRetry: r, cbContainer: cbC} + fr := &failureRetryer{failureCommon: &failureCommon{specifiedResultRetry: r, cbContainer: cbC}} if err := fr.UpdatePolicy(policy); err != nil { return nil, fmt.Errorf("newfailureRetryer failed, err=%w", err) } @@ -41,30 +41,22 @@ func newFailureRetryer(policy Policy, r *ShouldResultRetry, cbC *cbContainer) (R } type failureRetryer struct { - enable bool - policy *FailurePolicy - backOff BackOff - cbContainer *cbContainer - specifiedResultRetry *ShouldResultRetry + enable bool + *failureCommon + policy *FailurePolicy sync.RWMutex errMsg string } -// ShouldRetry implements the Retryer interface. +// ShouldRetry to check if retry request can be called, it is checked in retryer.Do. +// If not satisfy will return the reason message func (r *failureRetryer) ShouldRetry(ctx context.Context, err error, callTimes int, req interface{}, cbKey string) (string, bool) { r.RLock() defer r.RUnlock() if !r.enable { return "", false } - if stop, msg := circuitBreakerStop(ctx, r.policy.StopPolicy, r.cbContainer, req, cbKey); stop { - return msg, false - } - if stop, msg := ddlStop(ctx, r.policy.StopPolicy); stop { - return msg, false - } - r.backOff.Wait(callTimes) - return "", true + return r.shouldRetry(ctx, callTimes, req, cbKey, r.policy) } // AllowRetry implements the Retryer interface. @@ -109,8 +101,8 @@ func (r *failureRetryer) Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rp if i == 0 { callStart = startTime } else if i > 0 { - if maxDuration > 0 && time.Since(startTime) > maxDuration { - err = makeRetryErr(ctx, "exceed max duration", callTimes) + if ret, e := isExceedMaxDuration(ctx, startTime, maxDuration, callTimes); ret { + err = e break } if msg, ok := r.ShouldRetry(ctx, err, i, req, cbKey); !ok { @@ -137,20 +129,8 @@ func (r *failureRetryer) Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rp if !r.cbContainer.enablePercentageLimit && r.cbContainer.cbStat { circuitbreak.RecordStat(ctx, req, nil, err, cbKey, r.cbContainer.cbCtl, r.cbContainer.cbPanel) } - if err == nil { - if r.policy.IsRespRetry(ctx, resp, cRI) { - // user specified resp to do retry - continue - } + if !r.isRetryResult(ctx, cRI, resp, err, r.policy) { break - } else { - if i == retryTimes { - // stop retry then wrap error - err = kerrors.ErrRetry.WithCause(err) - } else if !r.isRetryErr(ctx, err, cRI) { - // not timeout or user specified error won't do retry - break - } } } recordRetryInfo(cRI, callTimes, callCosts.String()) @@ -168,32 +148,21 @@ func (r *failureRetryer) UpdatePolicy(rp Policy) (err error) { r.Unlock() return nil } - var errMsg string if rp.FailurePolicy == nil || rp.Type != FailureType { - errMsg = "FailurePolicy is nil or retry type not match, cannot do update in failureRetryer" - err = errors.New(errMsg) + err = errors.New("FailurePolicy is nil or retry type not match, cannot do update in failureRetryer") } - rt := rp.FailurePolicy.StopPolicy.MaxRetryTimes - if errMsg == "" && (rt < 0 || rt > maxFailureRetryTimes) { - errMsg = fmt.Sprintf("invalid failure MaxRetryTimes[%d]", rt) - err = errors.New(errMsg) - } - if errMsg == "" { - if e := checkCBErrorRate(&rp.FailurePolicy.StopPolicy.CBPolicy); e != nil { - rp.FailurePolicy.StopPolicy.CBPolicy.ErrorRate = defaultCBErrRate - errMsg = fmt.Sprintf("failureRetryer %s, use default %0.2f", e.Error(), defaultCBErrRate) - klog.Warnf(errMsg) - } + if err == nil { + err = checkStopPolicy(&rp.FailurePolicy.StopPolicy, maxFailureRetryTimes, r) } r.Lock() defer r.Unlock() r.enable = rp.Enable if err != nil { - r.errMsg = errMsg + r.errMsg = err.Error() return err } r.policy = rp.FailurePolicy - r.setSpecifiedResultRetryIfNeeded(r.specifiedResultRetry) + r.setSpecifiedResultRetryIfNeeded(r.specifiedResultRetry, r.policy) if bo, e := initBackOff(rp.FailurePolicy.BackOffPolicy); e != nil { r.errMsg = fmt.Sprintf("failureRetryer update BackOffPolicy failed, err=%s", e.Error()) klog.Warnf(r.errMsg) @@ -205,7 +174,7 @@ func (r *failureRetryer) UpdatePolicy(rp Policy) (err error) { // AppendErrMsgIfNeeded implements the Retryer interface. func (r *failureRetryer) AppendErrMsgIfNeeded(ctx context.Context, err error, ri rpcinfo.RPCInfo, msg string) { - if r.isRetryErr(ctx, err, ri) { + if r.isRetryErr(ctx, err, ri, r.policy) { // Add additional reason when retry is not applied. appendErrMsg(err, msg) } @@ -216,7 +185,53 @@ func (r *failureRetryer) Prepare(ctx context.Context, prevRI, retryRI rpcinfo.RP handleRetryInstance(r.policy.RetrySameNode, prevRI, retryRI) } -func (r *failureRetryer) isRetryErr(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { +// Type implements the Retryer interface. +func (r *failureRetryer) Type() Type { + return FailureType +} + +// Dump implements the Retryer interface. +func (r *failureRetryer) Dump() map[string]interface{} { + r.RLock() + defer r.RUnlock() + dm := make(map[string]interface{}) + dm["enable"] = r.enable + dm["failure_retry"] = r.policy + if r.policy != nil { + dm["specified_result_retry"] = r.dumpSpecifiedResultRetry(*r.policy) + } + if r.errMsg != "" { + dm["err_msg"] = r.errMsg + } + return dm +} + +type failureCommon struct { + backOff BackOff + specifiedResultRetry *ShouldResultRetry + cbContainer *cbContainer +} + +func (f *failureCommon) setSpecifiedResultRetryIfNeeded(rr *ShouldResultRetry, fp *FailurePolicy) { + if rr != nil { + // save the object specified by client.WithSpecifiedResultRetry(..) + f.specifiedResultRetry = rr + } + if fp != nil { + if f.specifiedResultRetry != nil { + // The priority of client.WithSpecifiedResultRetry(..) is higher, so always update it + // NOTE: client.WithSpecifiedResultRetry(..) will always reject a nil object + fp.ShouldResultRetry = f.specifiedResultRetry + } + + // even though rr passed from this func is nil, + // the Policy may also have ShouldResultRetry from client.WithFailureRetry or callopt.WithRetryPolicy. + // convertResultRetry is used to convert 'ErrorRetry and RespRetry' to 'ErrorRetryWithCtx and RespRetryWithCtx' + fp.convertResultRetry() + } +} + +func (r *failureCommon) isRetryErr(ctx context.Context, err error, ri rpcinfo.RPCInfo, fp *FailurePolicy) bool { if err == nil { return false } @@ -225,15 +240,53 @@ func (r *failureRetryer) isRetryErr(ctx context.Context, err error, ri rpcinfo.R // But CircuitBreak has been checked in ShouldRetry, it doesn't need to filter ServiceCircuitBreak. // If there are some other specified errors that cannot be retried, it should be filtered here. - if r.policy.IsRetryForTimeout() && kerrors.IsTimeoutError(err) { + if fp.isRetryForTimeout() && kerrors.IsTimeoutError(err) { + return true + } + if fp.isErrorRetry(ctx, err, ri) { return true } - if r.policy.IsErrorRetry(ctx, err, ri) { + return false +} + +func (r *failureCommon) shouldRetry(ctx context.Context, callTimes int, req interface{}, cbKey string, fp *FailurePolicy) (string, bool) { + if stop, msg := circuitBreakerStop(ctx, fp.StopPolicy, r.cbContainer, req, cbKey); stop { + return msg, false + } + if stop, msg := ddlStop(ctx, fp.StopPolicy); stop { + return msg, false + } + r.backOff.Wait(callTimes) + return "", true +} + +// isRetryResult to check if the result need to do retry +// Version Change Note: +// < v0.11.0 if the last result still failed, then wrap the error as RetryErr +// >= v0.11.0 don't wrap RetryErr. +// Consideration: Wrap as RetryErr will be reflected as a retry error from monitoring, which is not friendly for troubleshooting +func (r *failureCommon) isRetryResult(ctx context.Context, cRI rpcinfo.RPCInfo, resp interface{}, err error, fp *FailurePolicy) bool { + if err == nil { + if fp.isRespRetry(ctx, resp, cRI) { + // user specified resp to do retry + return true + } + } else if r.isRetryErr(ctx, err, cRI, fp) { return true } return false } +func (r *failureCommon) dumpSpecifiedResultRetry(fp FailurePolicy) map[string]bool { + return map[string]bool{ + "error_retry": fp.isErrorRetryWithCtxNonNil(), + "resp_retry": fp.isRespRetryWithCtxNonNil(), + // keep it for some versions to confirm the correctness when troubleshooting + "old_error_retry": fp.isErrorRetryNonNil(), + "old_resp_retry": fp.isRespRetryNonNil(), + } +} + func initBackOff(policy *BackOffPolicy) (bo BackOff, err error) { bo = NoneBackOff if policy == nil { @@ -269,48 +322,9 @@ func initBackOff(policy *BackOffPolicy) (bo BackOff, err error) { return } -// Type implements the Retryer interface. -func (r *failureRetryer) Type() Type { - return FailureType -} - -// Dump implements the Retryer interface. -func (r *failureRetryer) Dump() map[string]interface{} { - r.RLock() - defer r.RUnlock() - dm := make(map[string]interface{}) - dm["enable"] = r.enable - dm["failure_retry"] = r.policy - if r.policy != nil { - dm["specified_result_retry"] = map[string]bool{ - "error_retry": r.policy.IsErrorRetryWithCtxNonNil(), - "resp_retry": r.policy.IsRespRetryWithCtxNonNil(), - // keep it for some versions to confirm the correctness when troubleshooting - "old_error_retry": r.policy.IsErrorRetryNonNil(), - "old_resp_retry": r.policy.IsRespRetryNonNil(), - } - } - if r.errMsg != "" { - dm["errMsg"] = r.errMsg - } - return dm -} - -func (r *failureRetryer) setSpecifiedResultRetryIfNeeded(rr *ShouldResultRetry) { - if rr != nil { - // save the object specified by client.WithSpecifiedResultRetry(..) - r.specifiedResultRetry = rr - } - if r.policy != nil { - if r.specifiedResultRetry != nil { - // The priority of client.WithSpecifiedResultRetry(..) is higher, so always update it - // NOTE: client.WithSpecifiedResultRetry(..) will always reject a nil object - r.policy.ShouldResultRetry = r.specifiedResultRetry - } - - // even though rr passed from this func is nil, - // the Policy may also have ShouldResultRetry from client.WithFailureRetry or callopt.WithRetryPolicy. - // convertResultRetry is used to convert 'ErrorRetry and RespRetry' to 'ErrorRetryWithCtx and RespRetryWithCtx' - r.policy.ConvertResultRetry() +func isExceedMaxDuration(ctx context.Context, start time.Time, maxDuration time.Duration, callTimes int32) (bool, error) { + if maxDuration > 0 && time.Since(start) > maxDuration { + return true, makeRetryErr(ctx, fmt.Sprintf("exceed max duration[%v]", maxDuration), callTimes) } + return false, nil } diff --git a/pkg/retry/failure_test.go b/pkg/retry/failure_test.go index fe1bf87bd1..376342442e 100644 --- a/pkg/retry/failure_test.go +++ b/pkg/retry/failure_test.go @@ -17,10 +17,16 @@ package retry import ( + "context" + "sync/atomic" "testing" "time" "github.com/cloudwego/kitex/internal/test" + "github.com/cloudwego/kitex/pkg/kerrors" + "github.com/cloudwego/kitex/pkg/remote" + "github.com/cloudwego/kitex/pkg/rpcinfo" + "github.com/cloudwego/kitex/pkg/stats" ) func BenchmarkRandomBackOff_Wait(b *testing.B) { @@ -91,3 +97,597 @@ func TestNoneBackOff_String(t *testing.T) { msg := "NoneBackOff" test.Assert(t, bk.String() == msg) } + +// test FailurePolicy call +func TestFailurePolicyCall(t *testing.T) { + // call while rpc timeout + ctx := context.Background() + rc := NewRetryContainer() + failurePolicy := NewFailurePolicy() + failurePolicy.BackOffPolicy.BackOffType = FixedBackOffType + failurePolicy.BackOffPolicy.CfgItems = map[BackOffCfgKey]float64{ + FixMSBackOffCfgKey: 100.0, + } + failurePolicy.StopPolicy.MaxDurationMS = 100 + err := rc.Init(map[string]Policy{Wildcard: { + Enable: true, + Type: 0, + FailurePolicy: failurePolicy, + }}, nil) + test.Assert(t, err == nil, err) + ri := genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) + _, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + return ri, nil, kerrors.ErrRPCTimeout + }, ri, nil) + test.Assert(t, err != nil, err) + test.Assert(t, !ok) + + // call normal + failurePolicy.StopPolicy.MaxDurationMS = 0 + err = rc.Init(map[string]Policy{Wildcard: { + Enable: true, + Type: 0, + FailurePolicy: failurePolicy, + }}, nil) + test.Assert(t, err == nil, err) + _, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + return ri, nil, nil + }, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, ok) +} + +// test retry with one time policy +func TestRetryWithOneTimePolicy(t *testing.T) { + // call while rpc timeout and exceed MaxDurationMS cause BackOffPolicy is wait fix 100ms, it is invalid config + failurePolicy := NewFailurePolicy() + failurePolicy.BackOffPolicy.BackOffType = FixedBackOffType + failurePolicy.BackOffPolicy.CfgItems = map[BackOffCfgKey]float64{ + FixMSBackOffCfgKey: 100.0, + } + failurePolicy.StopPolicy.MaxDurationMS = 100 + p := Policy{ + Enable: true, + Type: 0, + FailurePolicy: failurePolicy, + } + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + _, ok, err := NewRetryContainer().WithRetryIfNeeded(ctx, &p, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + return ri, nil, kerrors.ErrRPCTimeout + }, ri, nil) + test.Assert(t, err != nil, err) + test.Assert(t, !ok) + + // call no MaxDurationMS limit, the retry will success + failurePolicy.StopPolicy.MaxDurationMS = 0 + var callTimes int32 + ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), genRPCInfo()) + _, ok, err = NewRetryContainer().WithRetryIfNeeded(ctx, &p, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + if atomic.LoadInt32(&callTimes) == 0 { + atomic.AddInt32(&callTimes, 1) + return ri, nil, kerrors.ErrRPCTimeout + } + return ri, nil, nil + }, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + + // call backup request + p = BuildBackupRequest(NewBackupPolicy(10)) + ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), genRPCInfo()) + callTimes = 0 + _, ok, err = NewRetryContainer().WithRetryIfNeeded(ctx, &p, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + if atomic.LoadInt32(&callTimes) == 0 || atomic.LoadInt32(&callTimes) == 1 { + atomic.AddInt32(&callTimes, 1) + time.Sleep(time.Millisecond * 100) + } + return ri, nil, nil + }, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + test.Assert(t, atomic.LoadInt32(&callTimes) == 2) +} + +// test specified error to retry +func TestSpecifiedErrorRetry(t *testing.T) { + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + var transErrCode int32 = 1000 + + // case1: specified method retry with error + t.Run("case1", func(t *testing.T) { + rc := NewRetryContainer() + checkResultRetry := false + shouldResultRetry := &ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == transErrCode { + checkResultRetry = true + return true + } + } + return false + }} + err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, checkResultRetry) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + }) + + // case2: specified method retry with error, but use backup request config cannot be effective + t.Run("case2", func(t *testing.T) { + shouldResultRetry := &ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == transErrCode { + return true + } + } + return false + }} + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{Wildcard: BuildBackupRequest(NewBackupPolicy(10))}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + _, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err != nil, err) + test.Assert(t, !ok) + }) + + // case3: specified method retry with error, but method not match + t.Run("case3", func(t *testing.T) { + shouldResultRetry := &ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() != method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == transErrCode { + return true + } + } + return false + }} + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err != nil) + test.Assert(t, !ok) + _, ok = ri.To().Tag(remoteTagKey) + test.Assert(t, !ok) + }) + + // case4: all error retry + t.Run("case4", func(t *testing.T) { + rc := NewRetryContainer() + p := BuildFailurePolicy(NewFailurePolicyWithResultRetry(AllErrorRetry())) + ri = genRPCInfo() + ri, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + }) +} + +// test specified resp to retry +func TestSpecifiedRespRetry(t *testing.T) { + retryResult := &mockResult{} + retryResp := mockResp{ + code: 500, + msg: "retry", + } + noRetryResp := mockResp{ + code: 0, + msg: "noretry", + } + var callTimes int32 + retryWithResp := func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + newVal := atomic.AddInt32(&callTimes, 1) + if newVal == 1 { + retryResult.setResult(retryResp) + return genRPCInfo(), retryResult, nil + } else { + retryResult.setResult(noRetryResp) + return genRPCInfoWithRemoteTag(remoteTags), retryResult, nil + } + } + ctx := context.Background() + ri := genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) + rc := NewRetryContainer() + // case1: specified method retry with resp + shouldResultRetry := &ShouldResultRetry{RespRetry: func(resp interface{}, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if r, ok := resp.(*mockResult); ok && r.getResult() == retryResp { + return true + } + } + return false + }} + err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, retryResult.getResult() == noRetryResp, retryResult) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + + // case2 specified method retry with resp, but use backup request config cannot be effective + atomic.StoreInt32(&callTimes, 0) + rc = NewRetryContainer() + err = rc.Init(map[string]Policy{Wildcard: BuildBackupRequest(NewBackupPolicy(100))}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + _, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, retryResult.getResult() == retryResp, retryResp) + test.Assert(t, !ok) + + // case3: specified method retry with resp, but method not match + atomic.StoreInt32(&callTimes, 0) + shouldResultRetry = &ShouldResultRetry{RespRetry: func(resp interface{}, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() != method { + if r, ok := resp.(*mockResult); ok && r.getResult() == retryResp { + return true + } + } + return false + }} + rc = NewRetryContainer() + err = rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + ri, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, retryResult.getResult() == retryResp, retryResult) + test.Assert(t, ok) + _, ok = ri.To().Tag(remoteTagKey) + test.Assert(t, !ok) +} + +// test specified error to retry with ErrorRetryWithCtx +func TestSpecifiedErrorRetryWithCtx(t *testing.T) { + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + var transErrCode int32 = 1000 + + // case1: specified method retry with error + t.Run("case1", func(t *testing.T) { + rc := NewRetryContainer() + shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == transErrCode { + return true + } + } + return false + }} + err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + }) + + // case2: specified method retry with error, but use backup request config cannot be effective + t.Run("case2", func(t *testing.T) { + shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == transErrCode { + return true + } + } + return false + }} + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{Wildcard: BuildBackupRequest(NewBackupPolicy(10))}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + _, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err != nil, err) + test.Assert(t, !ok) + }) + + // case3: specified method retry with error, but method not match + t.Run("case3", func(t *testing.T) { + shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + return ri.To().Method() != method + }} + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err != nil) + test.Assert(t, !ok) + _, ok = ri.To().Tag(remoteTagKey) + test.Assert(t, !ok) + }) + + // case4: all error retry + t.Run("case4", func(t *testing.T) { + rc := NewRetryContainer() + p := BuildFailurePolicy(NewFailurePolicyWithResultRetry(AllErrorRetry())) + ri = genRPCInfo() + ri, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + }) + + // case5: specified method retry with error, only ctx has some info then retry + ctxKeyVal := "ctxKeyVal" + t.Run("case5", func(t *testing.T) { + shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method && ctx.Value(ctxKeyVal) == ctxKeyVal { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { + return true + } + } + return false + }} + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + ctx = context.WithValue(ctx, ctxKeyVal, ctxKeyVal) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + }) +} + +// test specified error to retry, but has both old and new policy, the new one will be effective +func TestSpecifiedErrorRetryHasOldAndNew(t *testing.T) { + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + + // case1: ErrorRetryWithCtx will retry, but ErrorRetry not retry, the expect result is do retry + t.Run("case1", func(t *testing.T) { + rc := NewRetryContainer() + shouldResultRetry := &ShouldResultRetry{ + ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + return true + }, + ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { + return false + }, + } + err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, 1000), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + }) + + // case2: ErrorRetryWithCtx not retry, but ErrorRetry retry, the expect result is that not do retry + t.Run("case2", func(t *testing.T) { + shouldResultRetry := &ShouldResultRetry{ + ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + return false + }, + ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { + return true + }, + } + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, 1000), ri, nil) + test.Assert(t, err != nil) + test.Assert(t, !ok) + _, ok = ri.To().Tag(remoteTagKey) + test.Assert(t, !ok) + }) +} + +// test specified resp to retry with ErrorRetryWithCtx +func TestSpecifiedRespRetryWithCtx(t *testing.T) { + retryResult := &mockResult{} + retryResp := mockResp{ + code: 500, + msg: "retry", + } + noRetryResp := mockResp{ + code: 0, + msg: "noretry", + } + var callTimes int32 + retryWithResp := func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + newVal := atomic.AddInt32(&callTimes, 1) + if newVal == 1 { + retryResult.setResult(retryResp) + return genRPCInfo(), retryResult, nil + } else { + retryResult.setResult(noRetryResp) + return genRPCInfoWithRemoteTag(remoteTags), retryResult, nil + } + } + ctx := context.Background() + ri := genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) + rc := NewRetryContainer() + // case1: specified method retry with resp + shouldResultRetry := &ShouldResultRetry{RespRetryWithCtx: func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if r, ok := resp.(*mockResult); ok && r.getResult() == retryResp { + return true + } + } + return false + }} + err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, retryResult.getResult() == noRetryResp, retryResult) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + + // case2 specified method retry with resp, but use backup request config cannot be effective + atomic.StoreInt32(&callTimes, 0) + rc = NewRetryContainer() + err = rc.Init(map[string]Policy{Wildcard: BuildBackupRequest(NewBackupPolicy(100))}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + _, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, retryResult.getResult() == retryResp, retryResp) + test.Assert(t, !ok) + + // case3: specified method retry with resp, but method not match + atomic.StoreInt32(&callTimes, 0) + shouldResultRetry = &ShouldResultRetry{RespRetryWithCtx: func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() != method { + if r, ok := resp.(*mockResult); ok && r.getResult() == retryResp { + return true + } + } + return false + }} + rc = NewRetryContainer() + err = rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + ri, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, retryResult.getResult() == retryResp, retryResult) + test.Assert(t, ok) + _, ok = ri.To().Tag(remoteTagKey) + test.Assert(t, !ok) + + // case4: specified method retry with resp, only ctx has some info then retry + ctxKeyVal := "ctxKeyVal" + atomic.StoreInt32(&callTimes, 0) + shouldResultRetry2 := &ShouldResultRetry{RespRetryWithCtx: func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method && ctx.Value(ctxKeyVal) == ctxKeyVal { + if r, ok := resp.(*mockResult); ok && r.getResult() == retryResp { + return true + } + } + return false + }} + err = rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry2) + test.Assert(t, err == nil, err) + ctx = context.WithValue(ctx, ctxKeyVal, ctxKeyVal) + ri, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, retryResult.getResult() == noRetryResp, retryResult) + test.Assert(t, !ok) + v, ok = ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) +} + +func TestResultRetryWithPolicyChange(t *testing.T) { + rc := NewRetryContainer() + shouldResultRetry := &ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { + return true + } + } + return false + }} + err := rc.Init(nil, shouldResultRetry) + test.Assert(t, err == nil, err) + + // case 1: first time trigger NotifyPolicyChange, the `initRetryer` will be executed, check if the ShouldResultRetry is not nil + rc.NotifyPolicyChange(Wildcard, BuildFailurePolicy(NewFailurePolicy())) + r := rc.getRetryer(context.Background(), genRPCInfo()) + fr, ok := r.(*failureRetryer) + test.Assert(t, ok) + test.Assert(t, fr.policy.ShouldResultRetry == shouldResultRetry) + + // case 2: second time trigger NotifyPolicyChange, the `UpdatePolicy` will be executed, check if the ShouldResultRetry is not nil + rc.NotifyPolicyChange(Wildcard, BuildFailurePolicy(NewFailurePolicy())) + r = rc.getRetryer(context.Background(), genRPCInfo()) + fr, ok = r.(*failureRetryer) + test.Assert(t, ok) + test.Assert(t, fr.policy.ShouldResultRetry == shouldResultRetry) +} + +func TestResultRetryWithCtxWhenPolicyChange(t *testing.T) { + rc := NewRetryContainer() + shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { + return true + } + } + return false + }} + err := rc.Init(nil, shouldResultRetry) + test.Assert(t, err == nil, err) + + // case 1: first time trigger NotifyPolicyChange, the `initRetryer` will be executed, check if the ShouldResultRetry is not nil + rc.NotifyPolicyChange(Wildcard, BuildFailurePolicy(NewFailurePolicy())) + r := rc.getRetryer(context.Background(), genRPCInfo()) + fr, ok := r.(*failureRetryer) + test.Assert(t, ok) + test.Assert(t, fr.policy.ShouldResultRetry == shouldResultRetry) + + // case 2: second time trigger NotifyPolicyChange, the `UpdatePolicy` will be executed, check if the ShouldResultRetry is not nil + rc.NotifyPolicyChange(Wildcard, BuildFailurePolicy(NewFailurePolicy())) + r = rc.getRetryer(context.Background(), genRPCInfo()) + fr, ok = r.(*failureRetryer) + test.Assert(t, ok) + test.Assert(t, fr.policy.ShouldResultRetry == shouldResultRetry) +} + +func TestFailureRetryWithRPCInfo(t *testing.T) { + // failure retry + ctx := context.Background() + rc := NewRetryContainer() + + ri := genRPCInfo() + ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) + rpcinfo.Record(ctx, ri, stats.RPCStart, nil) + + // call with retry policy + var callTimes int32 + policy := BuildFailurePolicy(NewFailurePolicy()) + ri, ok, err := rc.WithRetryIfNeeded(ctx, &policy, retryCall(&callTimes, ri, false), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + test.Assert(t, ri.Stats().GetEvent(stats.RPCStart).Status() == stats.StatusInfo) + test.Assert(t, ri.Stats().GetEvent(stats.RPCFinish).Status() == stats.StatusInfo) + test.Assert(t, ri.To().Address().String() == "10.20.30.40:8888") + test.Assert(t, atomic.LoadInt32(&callTimes) == 2) +} + +var retryWithTransError = func(callTimes, transErrCode int32) RPCCallFunc { + // fails for the first call if callTimes is initialized to 0 + return func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + if atomic.AddInt32(&callTimes, 1) == 1 { + // first call retry TransErr with specified errCode + return genRPCInfo(), nil, remote.NewTransErrorWithMsg(transErrCode, "mock") + } else { + return genRPCInfoWithRemoteTag(remoteTags), nil, nil + } + } +} diff --git a/pkg/retry/mixed.go b/pkg/retry/mixed.go new file mode 100644 index 0000000000..103c7df0b4 --- /dev/null +++ b/pkg/retry/mixed.go @@ -0,0 +1,82 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package retry + +import "fmt" + +const maxMixRetryTimes = 3 + +// NewMixedPolicy init default mixed retry policy +func NewMixedPolicy(delayMS uint32) *MixedPolicy { + if delayMS == 0 { + panic("invalid backup request delay duration in MixedPolicy") + } + p := &MixedPolicy{ + RetryDelayMS: delayMS, + FailurePolicy: FailurePolicy{ + StopPolicy: StopPolicy{ + MaxRetryTimes: defaultBackupRetryTimes, + DisableChainStop: false, + CBPolicy: CBPolicy{ + ErrorRate: defaultCBErrRate, + }, + }, + BackOffPolicy: &BackOffPolicy{BackOffType: NoneBackOffType}, + }, + } + return p +} + +// NewMixedPolicyWithResultRetry init failure retry policy with ShouldResultRetry +func NewMixedPolicyWithResultRetry(delayMS uint32, rr *ShouldResultRetry) *MixedPolicy { + fp := NewMixedPolicy(delayMS) + fp.ShouldResultRetry = rr + return fp +} + +// String is used to print human readable debug info. +func (p *MixedPolicy) String() string { + return fmt.Sprintf("{RetryDelayMS:%+v StopPolicy:%+v BackOffPolicy:%+v RetrySameNode:%+v "+ + "ShouldResultRetry:{ErrorRetry:%t, RespRetry:%t}}", p.RetryDelayMS, p.StopPolicy, p.BackOffPolicy, p.RetrySameNode, p.isErrorRetryWithCtxNonNil(), p.isRespRetryWithCtxNonNil()) +} + +// Equals to check if MixedPolicy is equal +func (p *MixedPolicy) Equals(np *MixedPolicy) bool { + if p == nil { + return np == nil + } + if np == nil { + return false + } + if p.RetryDelayMS != np.RetryDelayMS { + return false + } + if !p.FailurePolicy.Equals(&np.FailurePolicy) { + return false + } + return true +} + +func (p *MixedPolicy) DeepCopy() *MixedPolicy { + if p == nil { + return nil + } + return &MixedPolicy{ + RetryDelayMS: p.RetryDelayMS, + FailurePolicy: *p.FailurePolicy.DeepCopy(), + } +} diff --git a/pkg/retry/mixed_retryer.go b/pkg/retry/mixed_retryer.go new file mode 100644 index 0000000000..2ef8425ac4 --- /dev/null +++ b/pkg/retry/mixed_retryer.go @@ -0,0 +1,258 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package retry + +import ( + "context" + "errors" + "fmt" + "sync" + "sync/atomic" + "time" + + "github.com/cloudwego/kitex/pkg/circuitbreak" + "github.com/cloudwego/kitex/pkg/gofunc" + "github.com/cloudwego/kitex/pkg/kerrors" + "github.com/cloudwego/kitex/pkg/klog" + "github.com/cloudwego/kitex/pkg/rpcinfo" + "github.com/cloudwego/kitex/pkg/utils" +) + +func newMixedRetryer(policy Policy, r *ShouldResultRetry, cbC *cbContainer) (Retryer, error) { + mr := &mixedRetryer{failureCommon: &failureCommon{specifiedResultRetry: r, cbContainer: cbC}} + if err := mr.UpdatePolicy(policy); err != nil { + return nil, fmt.Errorf("newMixedRetryer failed, err=%w", err) + } + return mr, nil +} + +type mixedRetryer struct { + enable bool + *failureCommon + policy *MixedPolicy + retryDelay time.Duration + sync.RWMutex + errMsg string +} + +// ShouldRetry to check if retry request can be called, it is checked in retryer.Do. +// If not satisfy will return the reason message +// Actually, the ShouldRetry logic is same with failureRetryer, because +func (r *mixedRetryer) ShouldRetry(ctx context.Context, err error, callTimes int, req interface{}, cbKey string) (string, bool) { + r.RLock() + defer r.RUnlock() + if !r.enable { + return "", false + } + return r.shouldRetry(ctx, callTimes, req, cbKey, &r.policy.FailurePolicy) +} + +// AllowRetry implements the Retryer interface. +func (r *mixedRetryer) AllowRetry(ctx context.Context) (string, bool) { + r.RLock() + defer r.RUnlock() + if !r.enable || r.policy.StopPolicy.MaxRetryTimes == 0 { + return "", false + } + if stop, msg := chainStop(ctx, r.policy.StopPolicy); stop { + return msg, false + } + if stop, msg := ddlStop(ctx, r.policy.StopPolicy); stop { + return msg, false + } + return "", true +} + +// Do implement the Retryer interface. +func (r *mixedRetryer) Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rpcinfo.RPCInfo, req interface{}) (lastRI rpcinfo.RPCInfo, recycleRI bool, err error) { + r.RLock() + var maxDuration time.Duration + if r.policy.StopPolicy.MaxDurationMS > 0 { + maxDuration = time.Duration(r.policy.StopPolicy.MaxDurationMS) * time.Millisecond + } + retryTimes := r.policy.StopPolicy.MaxRetryTimes + retryDelay := r.retryDelay + r.RUnlock() + + var callTimes int32 + var callCosts utils.StringBuilder + callCosts.RawStringBuilder().Grow(32) + + var recordCostDoing int32 = 0 + var abort int32 = 0 + doneCount := 0 + finishedErrCount := 0 + + // notice: buff num of chan is very important here, it cannot less than call times, or the below chan receive will block + callDone := make(chan *resultWrapper, retryTimes+1) + timer := time.NewTimer(retryDelay) + maxDurationTimer := time.NewTimer(maxDuration) + cbKey, _ := r.cbContainer.cbCtl.GetKey(ctx, req) + defer func() { + if panicInfo := recover(); panicInfo != nil { + err = panicToErr(ctx, panicInfo, firstRI) + } + timer.Stop() + maxDurationTimer.Stop() + }() + startTime := time.Now() + // include first call, max loop is retryTimes + 1 + doCall := true + for i := 0; ; { + if doCall { + doCall = false + if i > 0 { + if ret, e := isExceedMaxDuration(ctx, startTime, maxDuration, atomic.LoadInt32(&callTimes)); ret { + return firstRI, false, e + } + } + i++ + gofunc.GoFunc(ctx, func() { + if atomic.LoadInt32(&abort) == 1 { + return + } + var ( + e error + cRI rpcinfo.RPCInfo + resp interface{} + ) + defer func() { + if panicInfo := recover(); panicInfo != nil { + e = panicToErr(ctx, panicInfo, firstRI) + } + callDone <- &resultWrapper{cRI, resp, e} + }() + ct := atomic.AddInt32(&callTimes, 1) + callStart := time.Now() + if r.cbContainer.enablePercentageLimit { + // record stat before call since requests may be slow, making the limiter more accurate + recordRetryStat(cbKey, r.cbContainer.cbPanel, ct) + } + cRI, resp, e = rpcCall(ctx, r) + recordCost(ct, callStart, &recordCostDoing, &callCosts, &abort, e) + if !r.cbContainer.enablePercentageLimit && r.cbContainer.cbStat { + circuitbreak.RecordStat(ctx, req, nil, e, cbKey, r.cbContainer.cbCtl, r.cbContainer.cbPanel) + } + }) + } + select { + case <-timer.C: + // backup retry + if _, ok := r.ShouldRetry(ctx, nil, i, req, cbKey); ok && i <= retryTimes { + doCall = true + timer.Reset(retryDelay) + } + case res := <-callDone: + // result retry + doneCount++ + if res.err != nil && errors.Is(res.err, kerrors.ErrRPCFinish) { + // There will be only one request (goroutine) pass the `checkRPCState`, others will skip decoding + // and return `ErrRPCFinish`, to avoid concurrent write to response and save the cost of decoding. + // We can safely ignore this error and wait for the response of the passed goroutine. + if finishedErrCount++; finishedErrCount >= retryTimes+1 { + // But if all requests return this error, it must be a bug, preventive panic to avoid dead loop + panic(errUnexpectedFinish) + } + continue + } + if i <= retryTimes { + if _, ok := r.ShouldRetry(ctx, nil, i, req, cbKey); ok { + if r.isRetryResult(ctx, res.ri, res.resp, res.err, &r.policy.FailurePolicy) { + doCall = true + timer.Reset(retryDelay) + continue + } + } + } else if doneCount <= retryTimes && r.isRetryResult(ctx, res.ri, res.resp, res.err, &r.policy.FailurePolicy) { + continue + } + atomic.StoreInt32(&abort, 1) + recordRetryInfo(res.ri, atomic.LoadInt32(&callTimes), callCosts.String()) + return res.ri, false, res.err + } + } +} + +// UpdatePolicy implements the Retryer interface. +func (r *mixedRetryer) UpdatePolicy(rp Policy) (err error) { + if !rp.Enable { + r.Lock() + r.enable = rp.Enable + r.Unlock() + return nil + } + if rp.MixedPolicy == nil || rp.Type != MixedType { + err = errors.New("MixedPolicy is nil or retry type not match, cannot do update in mixedRetryer") + } + if err == nil && rp.MixedPolicy.RetryDelayMS == 0 { + err = errors.New("invalid retry delay duration in mixedRetryer") + } + if err == nil { + err = checkStopPolicy(&rp.MixedPolicy.StopPolicy, maxMixRetryTimes, r) + } + r.Lock() + defer r.Unlock() + r.enable = rp.Enable + if err != nil { + r.errMsg = err.Error() + return err + } + r.policy = rp.MixedPolicy + r.retryDelay = time.Duration(rp.MixedPolicy.RetryDelayMS) * time.Millisecond + r.setSpecifiedResultRetryIfNeeded(r.specifiedResultRetry, &r.policy.FailurePolicy) + if bo, e := initBackOff(rp.MixedPolicy.BackOffPolicy); e != nil { + r.errMsg = fmt.Sprintf("mixedRetryer update BackOffPolicy failed, err=%s", e.Error()) + klog.Warnf("KITEX: %s", r.errMsg) + } else { + r.backOff = bo + } + return nil +} + +// AppendErrMsgIfNeeded implements the Retryer interface. +func (r *mixedRetryer) AppendErrMsgIfNeeded(ctx context.Context, err error, ri rpcinfo.RPCInfo, msg string) { + if r.isRetryErr(ctx, err, ri, &r.policy.FailurePolicy) { + // Add additional reason when retry is not applied. + appendErrMsg(err, msg) + } +} + +// Prepare implements the Retryer interface. +func (r *mixedRetryer) Prepare(ctx context.Context, prevRI, retryRI rpcinfo.RPCInfo) { + handleRetryInstance(r.policy.RetrySameNode, prevRI, retryRI) +} + +// Type implements the Retryer interface. +func (r *mixedRetryer) Type() Type { + return MixedType +} + +// Dump implements the Retryer interface. +func (r *mixedRetryer) Dump() map[string]interface{} { + r.RLock() + defer r.RUnlock() + dm := make(map[string]interface{}) + dm["enable"] = r.enable + dm["mixed_retry"] = r.policy + if r.policy != nil { + dm["specified_result_retry"] = r.dumpSpecifiedResultRetry(r.policy.FailurePolicy) + } + if r.errMsg != "" { + dm["err_msg"] = r.errMsg + } + return dm +} diff --git a/pkg/retry/mixed_test.go b/pkg/retry/mixed_test.go new file mode 100644 index 0000000000..1760ec9dfb --- /dev/null +++ b/pkg/retry/mixed_test.go @@ -0,0 +1,550 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package retry + +import ( + "context" + "errors" + "math" + "sync/atomic" + "testing" + "time" + + "github.com/bytedance/sonic" + + "github.com/cloudwego/kitex/internal/test" + "github.com/cloudwego/kitex/pkg/kerrors" + "github.com/cloudwego/kitex/pkg/remote" + "github.com/cloudwego/kitex/pkg/rpcinfo" +) + +// test new MixedPolicy +func TestMixedRetryPolicy(t *testing.T) { + mp := NewMixedPolicy(100) + + // case 1 + mp.WithMaxRetryTimes(3) + jsonRet, err := sonic.MarshalString(mp) + test.Assert(t, err == nil, err) + var mp2 MixedPolicy + err = sonic.UnmarshalString(jsonRet, &mp2) + test.Assert(t, err == nil, err) + test.Assert(t, mp.Equals(&mp2)) + test.Assert(t, mp2.FailurePolicy.StopPolicy.MaxRetryTimes == 3) + + // case 2 + mp.WithMaxRetryTimes(2) + mp.WithRetrySameNode() + mp.WithFixedBackOff(10) + jsonRet, err = sonic.MarshalString(mp) + test.Assert(t, err == nil, err) + var mp3 MixedPolicy + err = sonic.UnmarshalString(jsonRet, &mp3) + test.Assert(t, err == nil, err) + test.Assert(t, mp.Equals(&mp3), mp3) + test.Assert(t, mp3.FailurePolicy.StopPolicy.MaxRetryTimes == 2) + test.Assert(t, mp3.FailurePolicy.BackOffPolicy.BackOffType == FixedBackOffType) + + // case 3 + mp.WithRandomBackOff(10, 20) + jsonRet, err = sonic.MarshalString(mp) + test.Assert(t, err == nil, err) + var mp4 MixedPolicy + err = sonic.UnmarshalString(jsonRet, &mp4) + test.Assert(t, err == nil, err) + test.Assert(t, mp.Equals(&mp4), mp4) + test.Assert(t, mp4.FailurePolicy.StopPolicy.MaxRetryTimes == 2) + test.Assert(t, mp4.FailurePolicy.BackOffPolicy.BackOffType == RandomBackOffType) + + // case 4 + mp.WithRetryBreaker(0.2) + mp.WithDDLStop() + mp.WithMaxDurationMS(100) + jsonRet, err = sonic.MarshalString(mp) + test.Assert(t, err == nil, err) + var mp5 MixedPolicy + err = sonic.UnmarshalString(jsonRet, &mp5) + test.Assert(t, err == nil, err) + test.Assert(t, mp.Equals(&mp5), mp5) + test.Assert(t, mp5.FailurePolicy.StopPolicy.DDLStop) + test.Assert(t, mp5.FailurePolicy.StopPolicy.MaxDurationMS == 100) + test.Assert(t, mp5.FailurePolicy.StopPolicy.CBPolicy.ErrorRate == 0.2) + + // case 5 + mp = &MixedPolicy{ + RetryDelayMS: 20, + FailurePolicy: FailurePolicy{ + StopPolicy: StopPolicy{ + MaxRetryTimes: 2, + DisableChainStop: false, + CBPolicy: CBPolicy{ + ErrorRate: defaultCBErrRate, + }, + }, + Extra: "{}", + }, + } + jsonRet, err = sonic.MarshalString(mp) + test.Assert(t, err == nil, err) + var mp6 MixedPolicy + err = sonic.UnmarshalString(jsonRet, &mp6) + test.Assert(t, err == nil, err) + test.Assert(t, mp6.BackOffPolicy == nil) + test.Assert(t, mp.Equals(&mp6), mp6) + test.Assert(t, mp6.FailurePolicy.StopPolicy.MaxRetryTimes == 2) + test.Assert(t, !mp6.FailurePolicy.StopPolicy.DisableChainStop) + test.Assert(t, mp6.FailurePolicy.StopPolicy.CBPolicy.ErrorRate == defaultCBErrRate) + + // case 6 + mp.DisableChainRetryStop() + jsonRet, err = sonic.MarshalString(mp) + test.Assert(t, err == nil, err) + var mp7 MixedPolicy + err = sonic.UnmarshalString(jsonRet, &mp7) + test.Assert(t, err == nil, err) + test.Assert(t, mp7.BackOffPolicy == nil) + test.Assert(t, mp.Equals(&mp7), mp7) + test.Assert(t, mp7.FailurePolicy.StopPolicy.DisableChainStop) + test.Assert(t, mp.String() == "{RetryDelayMS:20 StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true "+ + "DDLStop:false CBPolicy:{ErrorRate:0.1}} BackOffPolicy: RetrySameNode:false ShouldResultRetry:{ErrorRetry:false, RespRetry:false}}", mp) + + // case 7 + mp.WithSpecifiedResultRetry(&ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + return false + }}) + test.Assert(t, mp.String() == "{RetryDelayMS:20 StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true "+ + "DDLStop:false CBPolicy:{ErrorRate:0.1}} BackOffPolicy: RetrySameNode:false ShouldResultRetry:{ErrorRetry:true, RespRetry:false}}", mp) + jsonRet, err = sonic.MarshalString(mp) + test.Assert(t, err == nil, err) + var fp9 MixedPolicy + err = sonic.UnmarshalString(jsonRet, &fp9) + test.Assert(t, err == nil, err) + test.Assert(t, mp.Equals(&fp9), fp9) + test.Assert(t, fp9.ShouldResultRetry == nil) +} + +func TestNewMixedPolicy(t *testing.T) { + mp0 := NewMixedPolicy(100) + mp1 := NewMixedPolicy(100) + test.Assert(t, mp0.Equals(mp1)) + + mp1 = NewMixedPolicy(20) + test.Assert(t, !mp0.Equals(mp1)) + + mp1 = mp0.DeepCopy() + test.Assert(t, mp0.Equals(mp1)) + + mp1 = mp0.DeepCopy() + mp1.WithMaxRetryTimes(3) + test.Assert(t, !mp0.Equals(mp1)) + + mp1 = mp0.DeepCopy() + mp1.WithFixedBackOff(10) + test.Assert(t, !mp0.Equals(mp1)) + + mp1 = mp0.DeepCopy() + mp1.WithRetryBreaker(0.2) + test.Assert(t, !mp0.Equals(mp1)) + + mp1 = nil + test.Assert(t, !mp0.Equals(mp1)) + + mp0 = nil + test.Assert(t, mp0.Equals(mp1)) + + test.Panic(t, func() { NewMixedPolicy(0) }) +} + +// test MixedRetry call +func TestMixedRetry(t *testing.T) { + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + var transErrCode int32 = 1001 + shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == transErrCode { + return true + } + } + return false + }} + + // case1: specified method retry with error + t.Run("specified method retry with error", func(t *testing.T) { + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{Wildcard: BuildMixedPolicy(NewMixedPolicy(100))}, shouldResultRetry) + test.Assert(t, err == nil, err) + ri, ok, err := rc.WithRetryIfNeeded(ctx, nil, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + + ri, ok, err = rc.WithRetryIfNeeded(ctx, nil, retryWithTransError(0, 1002), ri, nil) + test.Assert(t, err != nil) + test.Assert(t, !ok) + _, ok = ri.To().Tag(remoteTagKey) + test.Assert(t, !ok) + }) + + // case2: specified method retry with error, but method not match + t.Run("specified method retry with error, but method not match", func(t *testing.T) { + rr := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() != method { + if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { + return true + } + } + return false + }} + rc := NewRetryContainer() + err := rc.Init(map[string]Policy{method: BuildMixedPolicy(NewMixedPolicy(100))}, rr) + test.Assert(t, err == nil, err) + ri = genRPCInfo() + ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err != nil) + test.Assert(t, !ok) + _, ok = ri.To().Tag(remoteTagKey) + test.Assert(t, !ok) + }) + + // case3: all error retry + t.Run("all error retry", func(t *testing.T) { + rc := NewRetryContainer() + p := BuildMixedPolicy(NewMixedPolicyWithResultRetry(100, AllErrorRetry())) + ri = genRPCInfo() + ri, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTransError(0, transErrCode), ri, nil) + test.Assert(t, err == nil, err) + test.Assert(t, !ok) + v, ok := ri.To().Tag(remoteTagKey) + test.Assert(t, ok) + test.Assert(t, v == remoteTagValue) + }) +} + +// Assuming the first request returns at 300ms, the second request costs 150ms +// Configuration: Timeout=200ms、MaxRetryTimes=2 BackupDelay=100ms +// - Mixed Retry: Success, cost 250ms +// - Failure Retry: Success, cost 350ms +// - Backup Retry: Failure, cost 200ms +func TestMockCase1WithDiffRetry(t *testing.T) { + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + retryWithTimeout := func(ri rpcinfo.RPCInfo, callTimes int32, resp *mockResult) RPCCallFunc { + return func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + ct := atomic.AddInt32(&callTimes, 1) + resp.setCallTimes(ct) + if ct == 1 { + // first call retry timeout + time.Sleep(200 * time.Millisecond) + return ri, nil, kerrors.ErrRPCTimeout.WithCause(errors.New("mock")) + } else { + time.Sleep(150 * time.Millisecond) + return ri, resp, nil + } + } + } + // mixed retry will success, latency is lowest + t.Run("mixed retry", func(t *testing.T) { + rc := NewRetryContainer() + mp := NewMixedPolicy(100) + mp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildMixedPolicy(mp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) // 100+150 = 250 + test.Assert(t, err == nil, err) + test.Assert(t, ret.getCallTimes() == 3, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-250.0) < 50.0, cost.Milliseconds()) + }) + + // failure retry will success, but latency is more than mixed retry + t.Run("failure retry", func(t *testing.T) { + rc := NewRetryContainer() + fp := NewFailurePolicy() + fp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildFailurePolicy(fp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err == nil, err) + test.Assert(t, ret.callTimes == 2, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-350.0) < 50.0, cost.Milliseconds()) + }) + + // backup request will failure + t.Run("backup request", func(t *testing.T) { + rc := NewRetryContainer() + bp := NewBackupPolicy(100) + bp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildBackupRequest(bp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err != nil, err) + test.Assert(t, errors.Is(err, kerrors.ErrRPCTimeout)) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-200.0) < 50.0, cost.Milliseconds()) + }) +} + +// Assuming the first request returns at 300ms, the second request cost 150ms +// Configuration: Timeout=300ms、MaxRetryTimes=2 BackupDelay=100ms +// - Mixed Retry: Success, cost 250ms (>timeout, same with Backup Retry) +// - Failure Retry: Success, cost 350ms +// - Backup Retry: Failure, cost 200ms (same with Mixed Retry) +func TestMockCase2WithDiffRetry(t *testing.T) { + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + retryWithTimeout := func(ri rpcinfo.RPCInfo, callTimes int32, resp *mockResult) RPCCallFunc { + return func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + ct := atomic.AddInt32(&callTimes, 1) + resp.setCallTimes(ct) + if ct == 1 { + // first call retry timeout + time.Sleep(300 * time.Millisecond) + return ri, nil, kerrors.ErrRPCTimeout.WithCause(errors.New("mock")) + } else { + time.Sleep(150 * time.Millisecond) + return ri, resp, nil + } + } + } + // mixed retry will success, latency is lowest + t.Run("mixed retry", func(t *testing.T) { + rc := NewRetryContainer() + mp := NewMixedPolicy(100) + mp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildMixedPolicy(mp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) // 100+150 = 250 + test.Assert(t, err == nil, err) + test.Assert(t, ret.getCallTimes() == 3, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-250.0) < 50.0, cost.Milliseconds()) + }) + + // failure retry will success, but latency is more than mixed retry + t.Run("failure retry", func(t *testing.T) { + rc := NewRetryContainer() + fp := NewFailurePolicy() + fp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildFailurePolicy(fp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err == nil, err) + test.Assert(t, ret.getCallTimes() == 2, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-450.0) < 50.0, cost.Milliseconds()) + }) + + // backup request will failure + t.Run("backup request", func(t *testing.T) { + rc := NewRetryContainer() + bp := NewBackupPolicy(100) + bp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildBackupRequest(bp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err == nil, err) + test.Assert(t, ret.getCallTimes() == 3, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-250.0) < 50.0, cost.Milliseconds()) + }) +} + +// Assuming all request timeout +// Configuration: Timeout=100ms、MaxRetryTimes=2 BackupDelay=100ms +// - Mixed Retry: Failure, cost 200ms +// - Failure Retry: Failure, cost 300ms +// - Backup Retry: Failure, cost 100ms (max cost is timeout) +func TestMockCase3WithDiffRetry(t *testing.T) { + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + retryWithTimeout := func(ri rpcinfo.RPCInfo, callTimes int32, resp *mockResult) RPCCallFunc { + return func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + ct := atomic.AddInt32(&callTimes, 1) + resp.setCallTimes(ct) + time.Sleep(100 * time.Millisecond) + return ri, nil, kerrors.ErrRPCTimeout.WithCause(errors.New("mock")) + } + } + // mixed retry will success, cost is least + t.Run("mixed retry", func(t *testing.T) { + rc := NewRetryContainer() + mp := NewMixedPolicy(100) + mp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildMixedPolicy(mp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) // 100+(100,100) = 200 + test.Assert(t, err != nil, err) + test.Assert(t, errors.Is(err, kerrors.ErrRPCTimeout)) + test.Assert(t, ret.getCallTimes() == 3, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-200.0) < 50.0, cost.Milliseconds()) + }) + + // failure retry will success, but cost is more than mixed retry + t.Run("failure retry", func(t *testing.T) { + rc := NewRetryContainer() + fp := NewFailurePolicy() + fp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildFailurePolicy(fp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err != nil, err) + test.Assert(t, errors.Is(err, kerrors.ErrRPCTimeout)) + test.Assert(t, ret.getCallTimes() == 3, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-300.0) < 50.0, cost.Milliseconds()) + }) + + // backup request will failure + t.Run("backup request", func(t *testing.T) { + rc := NewRetryContainer() + bp := NewBackupPolicy(100) + bp.WithMaxRetryTimes(2) // max call times is 3 + p := BuildBackupRequest(bp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTimeout(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err != nil, err) + test.Assert(t, errors.Is(err, kerrors.ErrRPCTimeout)) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-100.0) < 50.0, cost.Milliseconds()) + }) +} + +// Assuming BizStatus=11111/11112 needs to be retried, +// +// the first reply is BizStatus=11111, it costs 250ms, +// the second reply is BizStatus=11112, it costs 250ms, +// the third reply is BizStatus=0, it costs 250ms, +// +// Configuration: MaxRetryTimes=3 BackupDelay=100ms +// - Mixed Retry: Success, cost 450ms, two backup retry, and one failure retry +// - Failure Retry: Success, cost 750ms +// - Backup Retry: Biz Error, cost 250ms +func TestMockCase4WithDiffRetry(t *testing.T) { + bizStatusCode0, bizStatusCode1, bizStatusCode2 := 0, 11111, 11112 + ri := genRPCInfo() + ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) + retryWithResp := func(ri rpcinfo.RPCInfo, callTimes int32, resp *mockResult) RPCCallFunc { + return func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { + ct := atomic.AddInt32(&callTimes, 1) + resp.setCallTimes(ct) + time.Sleep(250 * time.Millisecond) + switch ct { + case 1: + resp.setResult(mockResp{code: bizStatusCode1}) + return ri, resp, nil + case 2: + resp.setResult(mockResp{code: bizStatusCode2}) + return ri, resp, nil + case 3: + resp.setResult(mockResp{code: bizStatusCode0}) + return ri, resp, nil + } + return ri, nil, errors.New("mock error") + } + } + resultRetry := &ShouldResultRetry{RespRetryWithCtx: func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { + bizCode := resp.(*mockResult).getResult().(mockResp).code + if bizCode == bizStatusCode1 || bizCode == bizStatusCode2 { + return true + } + return false + }} + // mixed retry will success, cost is least + t.Run("mixed retry", func(t *testing.T) { + rc := NewRetryContainer() + mp := NewMixedPolicy(100) + mp.WithMaxRetryTimes(3) // max call times is 4 + mp.WithSpecifiedResultRetry(resultRetry) + p := BuildMixedPolicy(mp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithResp(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err == nil, err) + test.Assert(t, ret.getCallTimes() == 4, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-450.0) < 50.0, cost.Milliseconds()) + }) + + // failure retry will success, but cost is more than mixed retry + t.Run("failure retry", func(t *testing.T) { + rc := NewRetryContainer() + fp := NewFailurePolicy() + fp.WithMaxRetryTimes(3) // max call times is 4 + fp.WithSpecifiedResultRetry(resultRetry) + p := BuildFailurePolicy(fp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithResp(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err == nil, err) + test.Assert(t, ret.getCallTimes() == 3, ret.callTimes) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-750.0) < 50.0, cost.Milliseconds()) + }) + + // backup request will failure + t.Run("backup request", func(t *testing.T) { + rc := NewRetryContainer() + bp := NewBackupPolicy(100) + bp.WithMaxRetryTimes(2) // backup max retry times is 2 + p := BuildBackupRequest(bp) + ri = genRPCInfo() + ret := &mockResult{} + start := time.Now() + _, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithResp(ri, 0, ret), ri, nil) + cost := time.Since(start) + test.Assert(t, err == nil, err) + test.Assert(t, ret.getResult().(mockResp).code == bizStatusCode1) + test.Assert(t, !ok) + test.Assert(t, math.Abs(float64(cost.Milliseconds())-250.0) < 50.0, cost.Milliseconds()) + }) +} diff --git a/pkg/retry/policy.go b/pkg/retry/policy.go index c3d2a06e77..2161739583 100644 --- a/pkg/retry/policy.go +++ b/pkg/retry/policy.go @@ -20,6 +20,7 @@ import ( "context" "fmt" + "github.com/cloudwego/kitex/pkg/klog" "github.com/cloudwego/kitex/pkg/rpcinfo" ) @@ -30,6 +31,7 @@ type Type int const ( FailureType Type = iota BackupType + MixedType ) // String prints human readable information. @@ -39,6 +41,8 @@ func (t Type) String() string { return "Failure" case BackupType: return "Backup" + case MixedType: + return "Mixed" } return "" } @@ -59,6 +63,14 @@ func BuildBackupRequest(p *BackupPolicy) Policy { return Policy{Enable: true, Type: BackupType, BackupPolicy: p} } +// BuildMixedPolicy is used to build Policy with *MixedPolicy +func BuildMixedPolicy(p *MixedPolicy) Policy { + if p == nil { + return Policy{} + } + return Policy{Enable: true, Type: MixedType, MixedPolicy: p} +} + // Policy contains all retry policies // DON'T FORGET to update Equals() and DeepCopy() if you add new fields type Policy struct { @@ -68,6 +80,7 @@ type Policy struct { // notice: only one retry policy can be enabled, which one depend on Policy.Type FailurePolicy *FailurePolicy `json:"failure_policy,omitempty"` BackupPolicy *BackupPolicy `json:"backup_policy,omitempty"` + MixedPolicy *MixedPolicy `json:"mixed_policy,omitempty"` } func (p *Policy) DeepCopy() *Policy { @@ -79,6 +92,7 @@ func (p *Policy) DeepCopy() *Policy { Type: p.Type, FailurePolicy: p.FailurePolicy.DeepCopy(), BackupPolicy: p.BackupPolicy.DeepCopy(), + MixedPolicy: p.MixedPolicy.DeepCopy(), } } @@ -104,6 +118,13 @@ type BackupPolicy struct { RetrySameNode bool `json:"retry_same_node"` } +// MixedPolicy for failure retry +// DON'T FORGET to update Equals() and DeepCopy() if you add new fields +type MixedPolicy struct { + RetryDelayMS uint32 `json:"retry_delay_ms"` + FailurePolicy +} + // StopPolicy is a group policies to decide when stop retry type StopPolicy struct { MaxRetryTimes int `json:"max_retry_times"` @@ -185,135 +206,6 @@ func (p Policy) Equals(np Policy) bool { return true } -// Equals to check if FailurePolicy is equal -func (p *FailurePolicy) Equals(np *FailurePolicy) bool { - if p == nil { - return np == nil - } - if np == nil { - return false - } - if p.StopPolicy != np.StopPolicy { - return false - } - if !p.BackOffPolicy.Equals(np.BackOffPolicy) { - return false - } - if p.RetrySameNode != np.RetrySameNode { - return false - } - if p.Extra != np.Extra { - return false - } - // don't need to check `ShouldResultRetry`, ShouldResultRetry is only setup by option - // in remote config case will always return false if check it - return true -} - -func (p *FailurePolicy) DeepCopy() *FailurePolicy { - if p == nil { - return nil - } - return &FailurePolicy{ - StopPolicy: p.StopPolicy, - BackOffPolicy: p.BackOffPolicy.DeepCopy(), - RetrySameNode: p.RetrySameNode, - ShouldResultRetry: p.ShouldResultRetry, // don't need DeepCopy - Extra: p.Extra, - } -} - -// IsRespRetryWithCtxNonNil is used to check if RespRetryWithCtx is nil. -func (p *FailurePolicy) IsRespRetryWithCtxNonNil() bool { - return p.ShouldResultRetry != nil && p.ShouldResultRetry.RespRetryWithCtx != nil -} - -// IsErrorRetryWithCtxNonNil is used to check if ErrorRetryWithCtx is nil -func (p *FailurePolicy) IsErrorRetryWithCtxNonNil() bool { - return p.ShouldResultRetry != nil && p.ShouldResultRetry.ErrorRetryWithCtx != nil -} - -// IsRespRetryNonNil is used to check if RespRetry is nil. -// Deprecated: please use IsRespRetryWithCtxNonNil instead of IsRespRetryNonNil. -func (p *FailurePolicy) IsRespRetryNonNil() bool { - return p.ShouldResultRetry != nil && p.ShouldResultRetry.RespRetry != nil -} - -// IsErrorRetryNonNil is used to check if ErrorRetry is nil. -// Deprecated: please use IsErrorRetryWithCtxNonNil instead of IsErrorRetryNonNil. -func (p *FailurePolicy) IsErrorRetryNonNil() bool { - return p.ShouldResultRetry != nil && p.ShouldResultRetry.ErrorRetry != nil -} - -// IsRetryForTimeout is used to check if timeout error need to retry -func (p *FailurePolicy) IsRetryForTimeout() bool { - return p.ShouldResultRetry == nil || !p.ShouldResultRetry.NotRetryForTimeout -} - -// IsRespRetry is used to check if the resp need to do retry. -func (p *FailurePolicy) IsRespRetry(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { - // note: actually, it is better to check IsRespRetry to ignore the bad cases, - // but IsRespRetry is a deprecated definition and here will be executed for every call, depends on ConvertResultRetry to ensure the compatibility - return p.IsRespRetryWithCtxNonNil() && p.ShouldResultRetry.RespRetryWithCtx(ctx, resp, ri) -} - -// IsErrorRetry is used to check if the error need to do retry. -func (p *FailurePolicy) IsErrorRetry(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - // note: actually, it is better to check IsErrorRetry to ignore the bad cases, - // but IsErrorRetry is a deprecated definition and here will be executed for every call, depends on ConvertResultRetry to ensure the compatibility - return p.IsErrorRetryWithCtxNonNil() && p.ShouldResultRetry.ErrorRetryWithCtx(ctx, err, ri) -} - -// ConvertResultRetry is used to convert 'ErrorRetry and RespRetry' to 'ErrorRetryWithCtx and RespRetryWithCtx' -func (p *FailurePolicy) ConvertResultRetry() { - if p == nil || p.ShouldResultRetry == nil { - return - } - rr := p.ShouldResultRetry - if rr.ErrorRetry != nil && rr.ErrorRetryWithCtx == nil { - rr.ErrorRetryWithCtx = func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - return rr.ErrorRetry(err, ri) - } - } - if rr.RespRetry != nil && rr.RespRetryWithCtx == nil { - rr.RespRetryWithCtx = func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { - return rr.RespRetry(resp, ri) - } - } -} - -// Equals to check if BackupPolicy is equal -func (p *BackupPolicy) Equals(np *BackupPolicy) bool { - if p == nil { - return np == nil - } - if np == nil { - return false - } - if p.RetryDelayMS != np.RetryDelayMS { - return false - } - if p.StopPolicy != np.StopPolicy { - return false - } - if p.RetrySameNode != np.RetrySameNode { - return false - } - - return true -} - -func (p *BackupPolicy) DeepCopy() *BackupPolicy { - if p == nil { - return nil - } - return &BackupPolicy{ - RetryDelayMS: p.RetryDelayMS, - StopPolicy: p.StopPolicy, // not a pointer, will copy the value here - RetrySameNode: p.RetrySameNode, - } -} - // Equals to check if BackOffPolicy is equal. func (p *BackOffPolicy) Equals(np *BackOffPolicy) bool { if p == nil { @@ -368,3 +260,16 @@ func checkCBErrorRate(p *CBPolicy) error { } return nil } + +func checkStopPolicy(sp *StopPolicy, maxRetryTimes int, retryer Retryer) error { + rt := sp.MaxRetryTimes + // 0 is valid, it means stop retry + if rt < 0 || rt > maxRetryTimes { + return fmt.Errorf("invalid MaxRetryTimes[%d]", rt) + } + if e := checkCBErrorRate(&sp.CBPolicy); e != nil { + sp.CBPolicy.ErrorRate = defaultCBErrRate + klog.Warnf("KITEX: %s retry - %s, use default %0.2f", retryer.Type(), e.Error(), defaultCBErrRate) + } + return nil +} diff --git a/pkg/retry/policy_test.go b/pkg/retry/policy_test.go index 156f877b29..fd9211ec10 100644 --- a/pkg/retry/policy_test.go +++ b/pkg/retry/policy_test.go @@ -21,7 +21,7 @@ import ( "reflect" "testing" - jsoniter "github.com/json-iterator/go" + "github.com/bytedance/sonic" "github.com/cloudwego/kitex/internal/test" "github.com/cloudwego/kitex/pkg/rpcinfo" @@ -29,8 +29,7 @@ import ( "github.com/cloudwego/kitex/pkg/stats" ) -var ( - jsoni = jsoniter.ConfigCompatibleWithStandardLibrary +const ( method = "test" ) @@ -40,11 +39,11 @@ func TestFailureRetryPolicy(t *testing.T) { // case 1 fp.WithMaxRetryTimes(3) - jsonRet, err := jsoni.MarshalToString(fp) + jsonRet, err := sonic.MarshalString(fp) test.Assert(t, err == nil, err) var fp2 FailurePolicy - err = jsoni.UnmarshalFromString(jsonRet, &fp2) + err = sonic.UnmarshalString(jsonRet, &fp2) test.Assert(t, err == nil, err) test.Assert(t, fp.Equals(&fp2)) @@ -52,23 +51,23 @@ func TestFailureRetryPolicy(t *testing.T) { fp.WithMaxRetryTimes(2) fp.WithRetrySameNode() fp.WithFixedBackOff(10) - jsonRet, err = jsoni.MarshalToString(fp) + jsonRet, err = sonic.MarshalString(fp) test.Assert(t, err == nil, err) // case 3 var fp3 FailurePolicy - err = jsoni.UnmarshalFromString(jsonRet, &fp3) + err = sonic.UnmarshalString(jsonRet, &fp3) test.Assert(t, err == nil, err) test.Assert(t, fp.Equals(&fp3), fp3) // case 4 fp.WithRetrySameNode() fp.WithRandomBackOff(10, 20) - jsonRet, err = jsoni.MarshalToString(fp) + jsonRet, err = sonic.MarshalString(fp) test.Assert(t, err == nil, err) var fp4 FailurePolicy - err = jsoni.UnmarshalFromString(jsonRet, &fp4) + err = sonic.UnmarshalString(jsonRet, &fp4) test.Assert(t, err == nil, err) test.Assert(t, fp.Equals(&fp4), fp4) @@ -76,11 +75,11 @@ func TestFailureRetryPolicy(t *testing.T) { fp.WithRetryBreaker(0.1) fp.WithDDLStop() fp.WithMaxDurationMS(100) - jsonRet, err = jsoni.MarshalToString(fp) + jsonRet, err = sonic.MarshalString(fp) test.Assert(t, err == nil, err) var fp5 FailurePolicy - err = jsoni.UnmarshalFromString(jsonRet, &fp5) + err = sonic.UnmarshalString(jsonRet, &fp5) test.Assert(t, err == nil, err) test.Assert(t, fp.Equals(&fp5), fp5) @@ -95,20 +94,20 @@ func TestFailureRetryPolicy(t *testing.T) { }, Extra: "{}", } - jsonRet, err = jsoni.MarshalToString(fp) + jsonRet, err = sonic.MarshalString(fp) test.Assert(t, err == nil, err) var fp6 FailurePolicy - err = jsoni.UnmarshalFromString(jsonRet, &fp6) + err = sonic.UnmarshalString(jsonRet, &fp6) test.Assert(t, err == nil, err) test.Assert(t, fp6.BackOffPolicy == nil) test.Assert(t, fp.Equals(&fp6), fp6) // case 7 fp.DisableChainRetryStop() - jsonRet, err = jsoni.MarshalToString(fp) + jsonRet, err = sonic.MarshalString(fp) test.Assert(t, err == nil, err) var fp7 FailurePolicy - err = jsoni.UnmarshalFromString(jsonRet, &fp7) + err = sonic.UnmarshalString(jsonRet, &fp7) test.Assert(t, err == nil, err) test.Assert(t, fp7.BackOffPolicy == nil) test.Assert(t, fp.Equals(&fp7), fp7) @@ -121,12 +120,13 @@ func TestFailureRetryPolicy(t *testing.T) { fp.WithSpecifiedResultRetry(&ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { return false }}) + fp.convertResultRetry() test.Assert(t, fp.String() == "{StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:true "+ "DDLStop:false CBPolicy:{ErrorRate:0.1}} BackOffPolicy: RetrySameNode:false ShouldResultRetry:{ErrorRetry:true, RespRetry:false}}", fp) - jsonRet, err = jsoni.MarshalToString(fp) + jsonRet, err = sonic.MarshalString(fp) test.Assert(t, err == nil, err) var fp9 FailurePolicy - err = jsoni.UnmarshalFromString(jsonRet, &fp9) + err = sonic.UnmarshalString(jsonRet, &fp9) test.Assert(t, err == nil, err) test.Assert(t, fp.Equals(&fp9), fp9) test.Assert(t, fp9.ShouldResultRetry == nil) @@ -139,13 +139,14 @@ func TestFailureRetryPolicyWithResultRetry(t *testing.T) { }, ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { return false }}) + fp.convertResultRetry() test.Assert(t, fp.String() == "{StopPolicy:{MaxRetryTimes:2 MaxDurationMS:0 DisableChainStop:false DDLStop:false "+ "CBPolicy:{ErrorRate:0.1}} BackOffPolicy:&{BackOffType:none CfgItems:map[]} RetrySameNode:false ShouldResultRetry:{ErrorRetry:true, RespRetry:true}}", fp) - jsonRet, err := jsoni.MarshalToString(fp) + jsonRet, err := sonic.MarshalString(fp) test.Assert(t, err == nil, err) var fp10 FailurePolicy - err = jsoni.UnmarshalFromString(jsonRet, &fp10) + err = sonic.UnmarshalString(jsonRet, &fp10) test.Assert(t, err == nil, err) test.Assert(t, fp.Equals(&fp10), fp10) test.Assert(t, fp10.ShouldResultRetry == nil) @@ -167,21 +168,21 @@ func TestBackupRequest(t *testing.T) { // case 1 bp.WithMaxRetryTimes(2) - jsonRet, err := jsoni.MarshalToString(bp) + jsonRet, err := sonic.MarshalString(bp) test.Assert(t, err == nil, err) var bp2 BackupPolicy - err = jsoni.UnmarshalFromString(jsonRet, &bp2) + err = sonic.UnmarshalString(jsonRet, &bp2) test.Assert(t, err == nil, err) test.Assert(t, bp.Equals(&bp2)) // case 2 bp.DisableChainRetryStop() - jsonRet, err = jsoni.MarshalToString(bp) + jsonRet, err = sonic.MarshalString(bp) test.Assert(t, err == nil, err) var bp3 BackupPolicy - err = jsoni.UnmarshalFromString(jsonRet, &bp3) + err = sonic.UnmarshalString(jsonRet, &bp3) test.Assert(t, err == nil, err) test.Assert(t, bp.Equals(&bp3)) } @@ -194,11 +195,11 @@ func TestRetryPolicyBothNotNil(t *testing.T) { BackupPolicy: NewBackupPolicy(20), } ctx := context.Background() - jsonRet, err := jsoni.MarshalToString(p) + jsonRet, err := sonic.MarshalString(p) test.Assert(t, err == nil, err) var p2 Policy - err = jsoni.UnmarshalFromString(jsonRet, &p2) + err = sonic.UnmarshalString(jsonRet, &p2) test.Assert(t, err == nil, err) test.Assert(t, p2.Enable == true) test.Assert(t, p.Equals(p2)) @@ -221,11 +222,11 @@ func TestRetryPolicyBothNotNil(t *testing.T) { // test new policy both nil func TestRetryPolicyBothNil(t *testing.T) { p := Policy{} - jsonRet, err := jsoni.MarshalToString(p) + jsonRet, err := sonic.MarshalString(p) test.Assert(t, err == nil, err) var p2 Policy - err = jsoni.UnmarshalFromString(jsonRet, &p2) + err = sonic.UnmarshalString(jsonRet, &p2) test.Assert(t, err == nil, err) test.Assert(t, p.Equals(p2)) @@ -245,7 +246,7 @@ func TestRetryPolicyFailure(t *testing.T) { } jsonRet := `{"enable":true,"type":0,"failure_policy":{"stop_policy":{"max_retry_times":2,"max_duration_ms":0,"disable_chain_stop":false,"ddl_stop":false,"cb_policy":{"error_rate":0.1,"min_sample":200}},"backoff_policy":{"backoff_type":"none"},"retry_same_node":false}}` var p2 Policy - err := jsoni.UnmarshalFromString(jsonRet, &p2) + err := sonic.UnmarshalString(jsonRet, &p2) test.Assert(t, err == nil, err) test.Assert(t, p2.Enable) test.Assert(t, p.Equals(p2)) @@ -269,11 +270,11 @@ func TestRetryPolicyFailure(t *testing.T) { Enable: true, FailurePolicy: fp, } - jsonRet, err = jsoni.MarshalToString(p) + jsonRet, err = sonic.MarshalString(p) test.Assert(t, err == nil, err) var p3 Policy - err = jsoni.UnmarshalFromString(jsonRet, &p3) + err = sonic.UnmarshalString(jsonRet, &p3) test.Assert(t, err == nil, err) test.Assert(t, p.Equals(p3)) @@ -312,62 +313,62 @@ func TestPolicyNotEqual(t *testing.T) { RetrySameNode: false, }, } - jsonRet, err := jsoni.MarshalToString(policy) + jsonRet, err := sonic.MarshalString(policy) test.Assert(t, err == nil, err) // case1 enable not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.Enable = false test.Assert(t, !p.Equals(policy)) // case2 type not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.Type = BackupType test.Assert(t, !p.Equals(policy)) // case3 failurePolicy not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.FailurePolicy = nil test.Assert(t, !p.Equals(policy)) test.Assert(t, !policy.Equals(p)) // case4 failurePolicy stopPolicy not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.FailurePolicy.StopPolicy.MaxRetryTimes = 2 test.Assert(t, !p.Equals(policy)) // case5 failurePolicy backOffPolicy not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.FailurePolicy.BackOffPolicy = nil test.Assert(t, !p.Equals(policy)) test.Assert(t, !policy.Equals(p)) // case6 failurePolicy backOffPolicy backOffType not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.FailurePolicy.BackOffPolicy.BackOffType = RandomBackOffType test.Assert(t, !p.Equals(policy)) // case7 failurePolicy backOffPolicy len(cfgItems) not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.FailurePolicy.BackOffPolicy.CfgItems[MinMSBackOffCfgKey] = 100 test.Assert(t, !p.Equals(policy)) // case8 failurePolicy backOffPolicy cfgItems not equal p = Policy{} - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.FailurePolicy.BackOffPolicy.CfgItems[FixMSBackOffCfgKey] = 101 test.Assert(t, !p.Equals(policy)) // case9 failurePolicy retrySameNode not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.FailurePolicy.RetrySameNode = true test.Assert(t, !p.Equals(policy)) @@ -390,31 +391,31 @@ func TestPolicyNotEqual(t *testing.T) { RetrySameNode: false, }, } - jsonRet, err = jsoni.MarshalToString(policy) + jsonRet, err = sonic.MarshalString(policy) test.Assert(t, err == nil, err) // case10 backupPolicy not equal p = Policy{} - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.BackupPolicy = nil test.Assert(t, !p.Equals(policy)) test.Assert(t, !policy.Equals(p)) // case11 backupPolicy retryDelayMS not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.BackupPolicy.RetryDelayMS = 2 test.Assert(t, !p.Equals(policy)) // case12 backupPolicy stopPolicy not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.BackupPolicy.StopPolicy.MaxRetryTimes = 3 test.Assert(t, !p.Equals(policy)) // case13 backupPolicy retrySameNode not equal - err = jsoni.UnmarshalFromString(jsonRet, &p) + err = sonic.UnmarshalString(jsonRet, &p) test.Assert(t, err == nil, err) p.BackupPolicy.RetrySameNode = true test.Assert(t, !p.Equals(policy)) @@ -432,7 +433,7 @@ func TestPolicyNotRetryForTimeout(t *testing.T) { }, }} // case 1: ShouldResultRetry is nil, retry for timeout - test.Assert(t, fp.IsRetryForTimeout()) + test.Assert(t, fp.isRetryForTimeout()) // case 2: ShouldResultRetry is not nil, NotRetryForTimeout is false, retry for timeout fp.ShouldResultRetry = &ShouldResultRetry{ @@ -442,7 +443,7 @@ func TestPolicyNotRetryForTimeout(t *testing.T) { // case 3: ShouldResultRetry is not nil, NotRetryForTimeout is true, not retry for timeout fp.ShouldResultRetry.NotRetryForTimeout = true - test.Assert(t, !fp.IsRetryForTimeout()) + test.Assert(t, !fp.isRetryForTimeout()) } func genRPCInfo() rpcinfo.RPCInfo { @@ -747,3 +748,30 @@ func TestPolicy_DeepCopy(t *testing.T) { }) } } + +func TestCheckStopPolicy(t *testing.T) { + mp := NewMixedPolicy(100) + err := checkStopPolicy(&mp.StopPolicy, maxMixRetryTimes, &mixedRetryer{}) + test.Assert(t, err == nil, err) + + mp.StopPolicy.MaxRetryTimes = -1 + err = checkStopPolicy(&mp.StopPolicy, maxMixRetryTimes, &mixedRetryer{}) + test.Assert(t, err != nil, err) + test.Assert(t, err.Error() == "invalid MaxRetryTimes[-1]") + + mp.StopPolicy.MaxRetryTimes = 5 + err = checkStopPolicy(&mp.StopPolicy, maxMixRetryTimes, &mixedRetryer{}) + test.Assert(t, err != nil, err) + test.Assert(t, err.Error() == "invalid MaxRetryTimes[5]") + mp.StopPolicy.MaxRetryTimes = maxMixRetryTimes + + mp.StopPolicy.CBPolicy.ErrorRate = 0.5 + err = checkStopPolicy(&mp.StopPolicy, maxMixRetryTimes, &mixedRetryer{}) + test.Assert(t, err == nil, err) + test.Assert(t, mp.StopPolicy.CBPolicy.ErrorRate == defaultCBErrRate) + + mp.StopPolicy.CBPolicy.ErrorRate = -0.1 + err = checkStopPolicy(&mp.StopPolicy, maxMixRetryTimes, &mixedRetryer{}) + test.Assert(t, err == nil, err) + test.Assert(t, mp.StopPolicy.CBPolicy.ErrorRate == defaultCBErrRate) +} diff --git a/pkg/retry/retryer.go b/pkg/retry/retryer.go index 1f0b8b1507..6b485a0ffd 100644 --- a/pkg/retry/retryer.go +++ b/pkg/retry/retryer.go @@ -43,10 +43,6 @@ type Retryer interface { // If not satisfy won't execute Retryer.Do and return the reason message // Execute anyway for the first time regardless of able to retry. AllowRetry(ctx context.Context) (msg string, ok bool) - - // ShouldRetry to check if retry request can be called, it is checked in retryer.Do. - // If not satisfy will return the reason message - ShouldRetry(ctx context.Context, err error, callTimes int, req interface{}, cbKey string) (msg string, ok bool) UpdatePolicy(policy Policy) error // Retry policy execute func. recycleRI is to decide if the firstRI can be recycled. @@ -369,9 +365,12 @@ func (rc *Container) WithRetryIfNeeded(ctx context.Context, callOptRetry *Policy // NewRetryer build a retryer with policy func NewRetryer(p Policy, r *ShouldResultRetry, cbC *cbContainer) (retryer Retryer, err error) { // just one retry policy can be enabled at same time - if p.Type == BackupType { + switch p.Type { + case MixedType: + retryer, err = newMixedRetryer(p, r, cbC) + case BackupType: retryer, err = newBackupRetryer(p, cbC) - } else { + default: retryer, err = newFailureRetryer(p, r, cbC) } return @@ -437,8 +436,11 @@ func (rc *Container) updateRetryer(rr *ShouldResultRetry) { rc.shouldResultRetry = rr if rc.shouldResultRetry != nil { rc.retryerMap.Range(func(key, value interface{}) bool { - if fr, ok := value.(*failureRetryer); ok { - fr.setSpecifiedResultRetryIfNeeded(rc.shouldResultRetry) + switch r := value.(type) { + case *failureRetryer: + r.setSpecifiedResultRetryIfNeeded(rc.shouldResultRetry, r.policy) + case *mixedRetryer: + r.setSpecifiedResultRetryIfNeeded(rc.shouldResultRetry, &r.policy.FailurePolicy) } return true }) diff --git a/pkg/retry/retryer_test.go b/pkg/retry/retryer_test.go index 0a13991b68..3d028c12d8 100644 --- a/pkg/retry/retryer_test.go +++ b/pkg/retry/retryer_test.go @@ -24,6 +24,8 @@ import ( "testing" "time" + "github.com/bytedance/sonic" + "github.com/cloudwego/kitex/internal/test" "github.com/cloudwego/kitex/pkg/circuitbreak" "github.com/cloudwego/kitex/pkg/discovery" @@ -38,6 +40,7 @@ var ( remoteTagKey = "k" remoteTagValue = "v" remoteTags = map[string]string{remoteTagKey: remoteTagValue} + sonici = sonic.Config{SortMapKeys: true}.Froze() ) // test new retry container @@ -112,8 +115,8 @@ func TestNewRetryContainer(t *testing.T) { RetryDelayMS: 0, }, }) - msg = "new retryer[test-Backup] failed, err=newBackupRetryer failed, err=invalid backup request delay duration or retryTimes, at " - test.Assert(t, rc.msg[:len(msg)] == msg) + msg = "new retryer[test-Backup] failed, err=newBackupRetryer failed, err=invalid retry delay duration in backupRetryer, at " + test.Assert(t, rc.msg[:len(msg)] == msg, rc.msg) // backupPolicy cBPolicy config invalid rc.NotifyPolicyChange(method, Policy{ @@ -142,8 +145,8 @@ func TestNewRetryContainer(t *testing.T) { }, }, }) - msg = "new retryer[test-Failure] failed, err=newfailureRetryer failed, err=invalid failure MaxRetryTimes[6], at " - test.Assert(t, rc.msg[:len(msg)] == msg) + msg = "new retryer[test-Failure] failed, err=newfailureRetryer failed, err=invalid MaxRetryTimes[6], at " + test.Assert(t, rc.msg[:len(msg)] == msg, rc.msg) // failurePolicy cBPolicy config invalid rc = NewRetryContainer() @@ -278,591 +281,121 @@ func TestNewRetryContainer(t *testing.T) { // test container dump func TestContainer_Dump(t *testing.T) { // test backupPolicy dump - rc := NewRetryContainerWithCB(nil, nil) - methodPolicies := map[string]Policy{ - method: { - Enable: true, - Type: 1, - BackupPolicy: NewBackupPolicy(20), - }, - } - rc.InitWithPolicies(methodPolicies) - err := rc.Init(map[string]Policy{Wildcard: { - Enable: true, - Type: 1, - BackupPolicy: NewBackupPolicy(20), - }}, nil) - test.Assert(t, err == nil, err) - rcDump, ok := rc.Dump().(map[string]interface{}) - test.Assert(t, ok) - hasCodeCfg, err := jsoni.MarshalToString(rcDump["has_code_cfg"]) - test.Assert(t, err == nil, err) - test.Assert(t, hasCodeCfg == "true", hasCodeCfg) - testStr, err := jsoni.MarshalToString(rcDump["test"]) - msg := `{"backupRequest":{"retry_delay_ms":20,"stop_policy":{"max_retry_times":1,"max_duration_ms":0,"disable_chain_stop":false,"ddl_stop":false,"cb_policy":{"error_rate":0.1}},"retry_same_node":false},"enable":true}` - test.Assert(t, err == nil, err) - test.Assert(t, testStr == msg) - - // test failurePolicy dump - rc = NewRetryContainerWithCB(nil, nil) - methodPolicies = map[string]Policy{ - method: { - Enable: true, - Type: FailureType, - FailurePolicy: NewFailurePolicy(), - }, - } - rc.InitWithPolicies(methodPolicies) - err = rc.Init(map[string]Policy{Wildcard: { - Enable: true, - Type: FailureType, - FailurePolicy: NewFailurePolicy(), - }}, nil) - test.Assert(t, err == nil, err) - rcDump, ok = rc.Dump().(map[string]interface{}) - test.Assert(t, ok) - hasCodeCfg, err = jsoni.MarshalToString(rcDump["has_code_cfg"]) - test.Assert(t, err == nil, err) - test.Assert(t, hasCodeCfg == "true") - testStr, err = jsoni.MarshalToString(rcDump["test"]) - msg = `{"enable":true,"failure_retry":{"stop_policy":{"max_retry_times":2,"max_duration_ms":0,"disable_chain_stop":false,"ddl_stop":false,"cb_policy":{"error_rate":0.1}},"backoff_policy":{"backoff_type":"none"},"retry_same_node":false,"extra":""},"specified_result_retry":{"error_retry":false,"old_error_retry":false,"old_resp_retry":false,"resp_retry":false}}` - test.Assert(t, err == nil, err) - test.Assert(t, testStr == msg, testStr) -} - -// test FailurePolicy call -func TestFailurePolicyCall(t *testing.T) { - // call while rpc timeout - ctx := context.Background() - rc := NewRetryContainer() - failurePolicy := NewFailurePolicy() - failurePolicy.BackOffPolicy.BackOffType = FixedBackOffType - failurePolicy.BackOffPolicy.CfgItems = map[BackOffCfgKey]float64{ - FixMSBackOffCfgKey: 100.0, - } - failurePolicy.StopPolicy.MaxDurationMS = 100 - err := rc.Init(map[string]Policy{Wildcard: { - Enable: true, - Type: 0, - FailurePolicy: failurePolicy, - }}, nil) - test.Assert(t, err == nil, err) - ri := genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) - _, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - return ri, nil, kerrors.ErrRPCTimeout - }, ri, nil) - test.Assert(t, err != nil, err) - test.Assert(t, !ok) - - // call normal - failurePolicy.StopPolicy.MaxDurationMS = 0 - err = rc.Init(map[string]Policy{Wildcard: { - Enable: true, - Type: 0, - FailurePolicy: failurePolicy, - }}, nil) - test.Assert(t, err == nil, err) - _, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - return ri, nil, nil - }, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, ok) -} - -// test retry with one time policy -func TestRetryWithOneTimePolicy(t *testing.T) { - // call while rpc timeout and exceed MaxDurationMS cause BackOffPolicy is wait fix 100ms, it is invalid config - failurePolicy := NewFailurePolicy() - failurePolicy.BackOffPolicy.BackOffType = FixedBackOffType - failurePolicy.BackOffPolicy.CfgItems = map[BackOffCfgKey]float64{ - FixMSBackOffCfgKey: 100.0, - } - failurePolicy.StopPolicy.MaxDurationMS = 100 - p := Policy{ - Enable: true, - Type: 0, - FailurePolicy: failurePolicy, - } - ri := genRPCInfo() - ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) - _, ok, err := NewRetryContainer().WithRetryIfNeeded(ctx, &p, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - return ri, nil, kerrors.ErrRPCTimeout - }, ri, nil) - test.Assert(t, err != nil, err) - test.Assert(t, !ok) - - // call no MaxDurationMS limit, the retry will success - failurePolicy.StopPolicy.MaxDurationMS = 0 - var callTimes int32 - ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), genRPCInfo()) - _, ok, err = NewRetryContainer().WithRetryIfNeeded(ctx, &p, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - if atomic.LoadInt32(&callTimes) == 0 { - atomic.AddInt32(&callTimes, 1) - return ri, nil, kerrors.ErrRPCTimeout - } - return ri, nil, nil - }, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, !ok) - - // call backup request - p = BuildBackupRequest(NewBackupPolicy(10)) - ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), genRPCInfo()) - callTimes = 0 - _, ok, err = NewRetryContainer().WithRetryIfNeeded(ctx, &p, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - if atomic.LoadInt32(&callTimes) == 0 || atomic.LoadInt32(&callTimes) == 1 { - atomic.AddInt32(&callTimes, 1) - time.Sleep(time.Millisecond * 100) - } - return ri, nil, nil - }, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, !ok) - test.Assert(t, atomic.LoadInt32(&callTimes) == 2) -} - -// test specified error to retry -func TestSpecifiedErrorRetry(t *testing.T) { - retryWithTransError := func(callTimes int32) RPCCallFunc { - // fails for the first call if callTimes is initialized to 0 - return func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - newVal := atomic.AddInt32(&callTimes, 1) - if newVal == 1 { - return genRPCInfo(), nil, remote.NewTransErrorWithMsg(1000, "mock") - } else { - return genRPCInfoWithRemoteTag(remoteTags), nil, nil - } + t.Run("backupPolicy dump", func(t *testing.T) { + rc := NewRetryContainerWithCB(nil, nil) + methodPolicies := map[string]Policy{ + method: { + Enable: true, + Type: BackupType, + BackupPolicy: NewBackupPolicy(20), + }, } - } - ri := genRPCInfo() - ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) - - // case1: specified method retry with error - t.Run("case1", func(t *testing.T) { - rc := NewRetryContainer() - shouldResultRetry := &ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method { - if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { - return true - } - } - return false - }} - err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) + err := rc.Init(methodPolicies, nil) test.Assert(t, err == nil, err) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) + rcDump, ok := rc.Dump().(map[string]interface{}) test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) - }) - - // case2: specified method retry with error, but use backup request config cannot be effective - t.Run("case2", func(t *testing.T) { - shouldResultRetry := &ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method { - if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { - return true - } - } - return false - }} - rc := NewRetryContainer() - err := rc.Init(map[string]Policy{Wildcard: BuildBackupRequest(NewBackupPolicy(10))}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri = genRPCInfo() - _, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) - test.Assert(t, err != nil, err) - test.Assert(t, !ok) - }) - - // case3: specified method retry with error, but method not match - t.Run("case3", func(t *testing.T) { - shouldResultRetry := &ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() != method { - if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { - return true - } - } - return false - }} - rc := NewRetryContainer() - err := rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + hasCodeCfg := rcDump["has_code_cfg"].(bool) + test.Assert(t, hasCodeCfg) + testStr, err := sonici.MarshalToString(rcDump[method]) + msg := `{"backup_request":{"retry_delay_ms":20,"stop_policy":{"max_retry_times":1,"max_duration_ms":0,"disable_chain_stop":false,"ddl_stop":false,"cb_policy":{"error_rate":0.1}},"retry_same_node":false},"enable":true}` test.Assert(t, err == nil, err) - ri = genRPCInfo() - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) - test.Assert(t, err != nil) - test.Assert(t, !ok) - _, ok = ri.To().Tag(remoteTagKey) - test.Assert(t, !ok) + test.Assert(t, testStr == msg) }) - // case4: all error retry - t.Run("case4", func(t *testing.T) { - rc := NewRetryContainer() - p := BuildFailurePolicy(NewFailurePolicyWithResultRetry(AllErrorRetry())) - ri = genRPCInfo() - ri, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTransError(0), ri, nil) + // test backupPolicy dump + t.Run("backupPolicy dump without code_cfg", func(t *testing.T) { + rc := NewRetryContainerWithCB(nil, nil) + policy := Policy{ + Enable: true, + Type: 1, + BackupPolicy: NewBackupPolicy(20), + } + err := rc.Init(nil, nil) + rc.NotifyPolicyChange(method, policy) test.Assert(t, err == nil, err) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) + rcDump, ok := rc.Dump().(map[string]interface{}) test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) + hasCodeCfg := rcDump["has_code_cfg"].(bool) + test.Assert(t, !hasCodeCfg) + testStr, err := sonici.MarshalToString(rcDump[method]) + msg := `{"backup_request":{"retry_delay_ms":20,"stop_policy":{"max_retry_times":1,"max_duration_ms":0,"disable_chain_stop":false,"ddl_stop":false,"cb_policy":{"error_rate":0.1}},"retry_same_node":false},"enable":true}` + test.Assert(t, err == nil, err) + test.Assert(t, testStr == msg) }) -} - -// test specified resp to retry -func TestSpecifiedRespRetry(t *testing.T) { - retryResult := &mockResult{} - retryResp := mockResp{ - code: 500, - msg: "retry", - } - noRetryResp := mockResp{ - code: 0, - msg: "noretry", - } - var callTimes int32 - retryWithResp := func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - newVal := atomic.AddInt32(&callTimes, 1) - if newVal == 1 { - retryResult.SetResult(retryResp) - return genRPCInfo(), retryResult, nil - } else { - retryResult.SetResult(noRetryResp) - return genRPCInfoWithRemoteTag(remoteTags), retryResult, nil - } - } - ctx := context.Background() - ri := genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) - rc := NewRetryContainer() - // case1: specified method retry with resp - shouldResultRetry := &ShouldResultRetry{RespRetry: func(resp interface{}, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method { - if r, ok := resp.(*mockResult); ok && r.GetResult() == retryResp { - return true - } - } - return false - }} - err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, retryResult.GetResult() == noRetryResp, retryResult) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) - test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) - - // case2 specified method retry with resp, but use backup request config cannot be effective - atomic.StoreInt32(&callTimes, 0) - rc = NewRetryContainer() - err = rc.Init(map[string]Policy{Wildcard: BuildBackupRequest(NewBackupPolicy(100))}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri = genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) - _, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, retryResult.GetResult() == retryResp, retryResp) - test.Assert(t, !ok) - - // case3: specified method retry with resp, but method not match - atomic.StoreInt32(&callTimes, 0) - shouldResultRetry = &ShouldResultRetry{RespRetry: func(resp interface{}, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() != method { - if r, ok := resp.(*mockResult); ok && r.GetResult() == retryResp { - return true - } - } - return false - }} - rc = NewRetryContainer() - err = rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri = genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) - ri, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, retryResult.GetResult() == retryResp, retryResult) - test.Assert(t, ok) - _, ok = ri.To().Tag(remoteTagKey) - test.Assert(t, !ok) -} -// test specified error to retry with ErrorRetryWithCtx -func TestSpecifiedErrorRetryWithCtx(t *testing.T) { - retryWithTransError := func(callTimes int32) RPCCallFunc { - // fails for the first call if callTimes is initialized to 0 - return func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - newVal := atomic.AddInt32(&callTimes, 1) - if newVal == 1 { - return genRPCInfo(), nil, remote.NewTransErrorWithMsg(1000, "mock") - } else { - return genRPCInfoWithRemoteTag(remoteTags), nil, nil - } + // test failurePolicy dump + t.Run("failurePolicy dump", func(t *testing.T) { + rc := NewRetryContainerWithCB(nil, nil) + methodPolicies := map[string]Policy{ + method: { + Enable: true, + Type: FailureType, + FailurePolicy: NewFailurePolicy(), + }, } - } - ri := genRPCInfo() - ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) - - // case1: specified method retry with error - t.Run("case1", func(t *testing.T) { - rc := NewRetryContainer() - shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method { - if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { - return true - } - } - return false - }} - err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) + err := rc.Init(methodPolicies, nil) test.Assert(t, err == nil, err) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) + rcDump, ok := rc.Dump().(map[string]interface{}) test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) - }) - - // case2: specified method retry with error, but use backup request config cannot be effective - t.Run("case2", func(t *testing.T) { - shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method { - if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { - return true - } - } - return false - }} - rc := NewRetryContainer() - err := rc.Init(map[string]Policy{Wildcard: BuildBackupRequest(NewBackupPolicy(10))}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri = genRPCInfo() - _, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) - test.Assert(t, err != nil, err) - test.Assert(t, !ok) - }) - - // case3: specified method retry with error, but method not match - t.Run("case3", func(t *testing.T) { - shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - return ri.To().Method() != method - }} - rc := NewRetryContainer() - err := rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + hasCodeCfg := rcDump["has_code_cfg"].(bool) + test.Assert(t, hasCodeCfg) + testStr, err := sonici.MarshalToString(rcDump[method]) + msg := `{"enable":true,"failure_retry":{"stop_policy":{"max_retry_times":2,"max_duration_ms":0,"disable_chain_stop":false,"ddl_stop":false,"cb_policy":{"error_rate":0.1}},"backoff_policy":{"backoff_type":"none"},"retry_same_node":false,"extra":""},"specified_result_retry":{"error_retry":false,"old_error_retry":false,"old_resp_retry":false,"resp_retry":false}}` test.Assert(t, err == nil, err) - ri = genRPCInfo() - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) - test.Assert(t, err != nil) - test.Assert(t, !ok) - _, ok = ri.To().Tag(remoteTagKey) - test.Assert(t, !ok) + test.Assert(t, testStr == msg, testStr) }) - // case4: all error retry - t.Run("case4", func(t *testing.T) { - rc := NewRetryContainer() - p := BuildFailurePolicy(NewFailurePolicyWithResultRetry(AllErrorRetry())) - ri = genRPCInfo() - ri, ok, err := rc.WithRetryIfNeeded(ctx, &p, retryWithTransError(0), ri, nil) + // test mixedPolicy dump + t.Run("mixedPolicy dump", func(t *testing.T) { + rc := NewRetryContainerWithCB(nil, nil) + policy := Policy{ + Enable: true, + Type: MixedType, + MixedPolicy: NewMixedPolicy(20), + } + err := rc.Init(nil, nil) test.Assert(t, err == nil, err) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) + rc.NotifyPolicyChange(method, policy) + rcDump, ok := rc.Dump().(map[string]interface{}) test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) + hasCodeCfg := rcDump["has_code_cfg"].(bool) + test.Assert(t, !hasCodeCfg) + testStr, err := sonici.MarshalToString(rcDump[method]) + msg := `{"enable":true,"mixed_retry":{"retry_delay_ms":20,"stop_policy":{"max_retry_times":1,"max_duration_ms":0,"disable_chain_stop":false,"ddl_stop":false,"cb_policy":{"error_rate":0.1}},"backoff_policy":{"backoff_type":"none"},"retry_same_node":false,"extra":""},"specified_result_retry":{"error_retry":false,"old_error_retry":false,"old_resp_retry":false,"resp_retry":false}}` + test.Assert(t, err == nil, err) + test.Assert(t, testStr == msg, testStr) }) - // case5: specified method retry with error, only ctx has some info then retry - ctxKeyVal := "ctxKeyVal" - t.Run("case5", func(t *testing.T) { - shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method && ctx.Value(ctxKeyVal) == ctxKeyVal { + // test mixedPolicy dump + t.Run("mixedPolicy dump with customized retry", func(t *testing.T) { + rc := NewRetryContainerWithCB(nil, nil) + policy := Policy{ + Enable: true, + Type: MixedType, + MixedPolicy: NewMixedPolicy(20), + } + rr := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { + if ri.To().Method() == method { if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { return true } } return false }} - rc := NewRetryContainer() - err := rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + err := rc.Init(nil, rr) test.Assert(t, err == nil, err) - ri = genRPCInfo() - ctx = context.WithValue(ctx, ctxKeyVal, ctxKeyVal) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) - test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) - }) -} - -// test specified error to retry, but has both old and new policy, the new one will be effective -func TestSpecifiedErrorRetryHasOldAndNew(t *testing.T) { - retryWithTransError := func(callTimes int32) RPCCallFunc { - // fails for the first call if callTimes is initialized to 0 - return func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - newVal := atomic.AddInt32(&callTimes, 1) - if newVal == 1 { - return genRPCInfo(), nil, remote.NewTransErrorWithMsg(1000, "mock") - } else { - return genRPCInfoWithRemoteTag(remoteTags), nil, nil - } - } - } - ri := genRPCInfo() - ctx := rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) - - // case1: ErrorRetryWithCtx will retry, but ErrorRetry not retry, the expect result is do retry - t.Run("case1", func(t *testing.T) { - rc := NewRetryContainer() - shouldResultRetry := &ShouldResultRetry{ - ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - return true - }, - ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { - return false - }, - } - err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) + rc.NotifyPolicyChange(method, policy) + rcDump, ok := rc.Dump().(map[string]interface{}) test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) - }) - - // case2: ErrorRetryWithCtx not retry, but ErrorRetry retry, the expect result is that not do retry - t.Run("case2", func(t *testing.T) { - shouldResultRetry := &ShouldResultRetry{ - ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - return false - }, - ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { - return true - }, - } - rc := NewRetryContainer() - err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) + hasCodeCfg := rcDump["has_code_cfg"].(bool) + test.Assert(t, !hasCodeCfg) + testStr, err := sonici.MarshalToString(rcDump[method]) + msg := `{"enable":true,"mixed_retry":{"retry_delay_ms":20,"stop_policy":{"max_retry_times":1,"max_duration_ms":0,"disable_chain_stop":false,"ddl_stop":false,"cb_policy":{"error_rate":0.1}},"backoff_policy":{"backoff_type":"none"},"retry_same_node":false,"extra":""},"specified_result_retry":{"error_retry":true,"old_error_retry":false,"old_resp_retry":false,"resp_retry":false}}` test.Assert(t, err == nil, err) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithTransError(0), ri, nil) - test.Assert(t, err != nil) - test.Assert(t, !ok) - _, ok = ri.To().Tag(remoteTagKey) - test.Assert(t, !ok) + test.Assert(t, testStr == msg, testStr) }) } -// test specified resp to retry with ErrorRetryWithCtx -func TestSpecifiedRespRetryWithCtx(t *testing.T) { - retryResult := &mockResult{} - retryResp := mockResp{ - code: 500, - msg: "retry", - } - noRetryResp := mockResp{ - code: 0, - msg: "noretry", - } - var callTimes int32 - retryWithResp := func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - newVal := atomic.AddInt32(&callTimes, 1) - if newVal == 1 { - retryResult.SetResult(retryResp) - return genRPCInfo(), retryResult, nil - } else { - retryResult.SetResult(noRetryResp) - return genRPCInfoWithRemoteTag(remoteTags), retryResult, nil - } - } - ctx := context.Background() - ri := genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) - rc := NewRetryContainer() - // case1: specified method retry with resp - shouldResultRetry := &ShouldResultRetry{RespRetryWithCtx: func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method { - if r, ok := resp.(*mockResult); ok && r.GetResult() == retryResp { - return true - } - } - return false - }} - err := rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, retryResult.GetResult() == noRetryResp, retryResult) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) - test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) - - // case2 specified method retry with resp, but use backup request config cannot be effective - atomic.StoreInt32(&callTimes, 0) - rc = NewRetryContainer() - err = rc.Init(map[string]Policy{Wildcard: BuildBackupRequest(NewBackupPolicy(100))}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri = genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) - _, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, retryResult.GetResult() == retryResp, retryResp) - test.Assert(t, !ok) - - // case3: specified method retry with resp, but method not match - atomic.StoreInt32(&callTimes, 0) - shouldResultRetry = &ShouldResultRetry{RespRetryWithCtx: func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() != method { - if r, ok := resp.(*mockResult); ok && r.GetResult() == retryResp { - return true - } - } - return false - }} - rc = NewRetryContainer() - err = rc.Init(map[string]Policy{method: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry) - test.Assert(t, err == nil, err) - ri = genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(context.Background(), ri) - ri, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, retryResult.GetResult() == retryResp, retryResult) - test.Assert(t, ok) - _, ok = ri.To().Tag(remoteTagKey) - test.Assert(t, !ok) - - // case4: specified method retry with resp, only ctx has some info then retry - ctxKeyVal := "ctxKeyVal" - atomic.StoreInt32(&callTimes, 0) - shouldResultRetry2 := &ShouldResultRetry{RespRetryWithCtx: func(ctx context.Context, resp interface{}, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method && ctx.Value(ctxKeyVal) == ctxKeyVal { - if r, ok := resp.(*mockResult); ok && r.GetResult() == retryResp { - return true - } - } - return false - }} - err = rc.Init(map[string]Policy{Wildcard: BuildFailurePolicy(NewFailurePolicy())}, shouldResultRetry2) - test.Assert(t, err == nil, err) - ctx = context.WithValue(ctx, ctxKeyVal, ctxKeyVal) - ri, ok, err = rc.WithRetryIfNeeded(ctx, &Policy{}, retryWithResp, ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, retryResult.GetResult() == noRetryResp, retryResult) - test.Assert(t, !ok) - v, ok = ri.To().Tag(remoteTagKey) - test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) -} - // test different method use different retry policy func TestDifferentMethodConfig(t *testing.T) { var callTimes int32 @@ -931,103 +464,6 @@ func TestDifferentMethodConfig(t *testing.T) { test.Assert(t, ok) } -func TestResultRetryWithPolicyChange(t *testing.T) { - rc := NewRetryContainer() - shouldResultRetry := &ShouldResultRetry{ErrorRetry: func(err error, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method { - if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { - return true - } - } - return false - }} - err := rc.Init(nil, shouldResultRetry) - test.Assert(t, err == nil, err) - - // case 1: first time trigger NotifyPolicyChange, the `initRetryer` will be executed, check if the ShouldResultRetry is not nil - rc.NotifyPolicyChange(Wildcard, BuildFailurePolicy(NewFailurePolicy())) - r := rc.getRetryer(context.Background(), genRPCInfo()) - fr, ok := r.(*failureRetryer) - test.Assert(t, ok) - test.Assert(t, fr.policy.ShouldResultRetry == shouldResultRetry) - - // case 2: second time trigger NotifyPolicyChange, the `UpdatePolicy` will be executed, check if the ShouldResultRetry is not nil - rc.NotifyPolicyChange(Wildcard, BuildFailurePolicy(NewFailurePolicy())) - r = rc.getRetryer(context.Background(), genRPCInfo()) - fr, ok = r.(*failureRetryer) - test.Assert(t, ok) - test.Assert(t, fr.policy.ShouldResultRetry == shouldResultRetry) -} - -func TestResultRetryWithCtxWhenPolicyChange(t *testing.T) { - rc := NewRetryContainer() - shouldResultRetry := &ShouldResultRetry{ErrorRetryWithCtx: func(ctx context.Context, err error, ri rpcinfo.RPCInfo) bool { - if ri.To().Method() == method { - if te, ok := err.(*remote.TransError); ok && te.TypeID() == 1000 { - return true - } - } - return false - }} - err := rc.Init(nil, shouldResultRetry) - test.Assert(t, err == nil, err) - - // case 1: first time trigger NotifyPolicyChange, the `initRetryer` will be executed, check if the ShouldResultRetry is not nil - rc.NotifyPolicyChange(Wildcard, BuildFailurePolicy(NewFailurePolicy())) - r := rc.getRetryer(context.Background(), genRPCInfo()) - fr, ok := r.(*failureRetryer) - test.Assert(t, ok) - test.Assert(t, fr.policy.ShouldResultRetry == shouldResultRetry) - - // case 2: second time trigger NotifyPolicyChange, the `UpdatePolicy` will be executed, check if the ShouldResultRetry is not nil - rc.NotifyPolicyChange(Wildcard, BuildFailurePolicy(NewFailurePolicy())) - r = rc.getRetryer(context.Background(), genRPCInfo()) - fr, ok = r.(*failureRetryer) - test.Assert(t, ok) - test.Assert(t, fr.policy.ShouldResultRetry == shouldResultRetry) -} - -// test BackupPolicy call while rpcTime > delayTime -func TestBackupPolicyCall(t *testing.T) { - ctx := context.Background() - rc := NewRetryContainer() - err := rc.Init(map[string]Policy{Wildcard: { - Enable: true, - Type: 1, - BackupPolicy: &BackupPolicy{ - RetryDelayMS: 30, - StopPolicy: StopPolicy{ - MaxRetryTimes: 2, - DisableChainStop: false, - CBPolicy: CBPolicy{ - ErrorRate: defaultCBErrRate, - }, - }, - }, - }}, nil) - test.Assert(t, err == nil, err) - - callTimes := int32(0) - firstRI := genRPCInfo() - secondRI := genRPCInfoWithRemoteTag(remoteTags) - ctx = rpcinfo.NewCtxWithRPCInfo(ctx, firstRI) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &Policy{}, func(ctx context.Context, r Retryer) (rpcinfo.RPCInfo, interface{}, error) { - atomic.AddInt32(&callTimes, 1) - if atomic.LoadInt32(&callTimes) == 1 { - // mock timeout for the first request and get the response of the backup request. - time.Sleep(time.Millisecond * 50) - return firstRI, nil, nil - } - return secondRI, nil, nil - }, firstRI, nil) - test.Assert(t, err == nil, err) - test.Assert(t, atomic.LoadInt32(&callTimes) == 2) - test.Assert(t, !ok) - v, ok := ri.To().Tag(remoteTagKey) - test.Assert(t, ok) - test.Assert(t, v == remoteTagValue) -} - // test policy noRetry call func TestPolicyNoRetryCall(t *testing.T) { ctx := context.Background() @@ -1138,50 +574,9 @@ func retryCall(callTimes *int32, firstRI rpcinfo.RPCInfo, backup bool) RPCCallFu } } -func TestFailureRetryWithRPCInfo(t *testing.T) { - // failure retry - ctx := context.Background() - rc := NewRetryContainer() - - ri := genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) - rpcinfo.Record(ctx, ri, stats.RPCStart, nil) - - // call with retry policy - var callTimes int32 - policy := BuildFailurePolicy(NewFailurePolicy()) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &policy, retryCall(&callTimes, ri, false), ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, !ok) - test.Assert(t, ri.Stats().GetEvent(stats.RPCStart).Status() == stats.StatusInfo) - test.Assert(t, ri.Stats().GetEvent(stats.RPCFinish).Status() == stats.StatusInfo) - test.Assert(t, ri.To().Address().String() == "10.20.30.40:8888") - test.Assert(t, atomic.LoadInt32(&callTimes) == 2) -} - -func TestBackupRetryWithRPCInfo(t *testing.T) { - // backup retry - ctx := context.Background() - rc := NewRetryContainer() - - ri := genRPCInfo() - ctx = rpcinfo.NewCtxWithRPCInfo(ctx, ri) - rpcinfo.Record(ctx, ri, stats.RPCStart, nil) - - // call with retry policy - var callTimes int32 - policy := BuildBackupRequest(NewBackupPolicy(10)) - ri, ok, err := rc.WithRetryIfNeeded(ctx, &policy, retryCall(&callTimes, ri, true), ri, nil) - test.Assert(t, err == nil, err) - test.Assert(t, !ok) - test.Assert(t, ri.Stats().GetEvent(stats.RPCStart).Status() == stats.StatusInfo) - test.Assert(t, ri.Stats().GetEvent(stats.RPCFinish).Status() == stats.StatusInfo) - test.Assert(t, ri.To().Address().String() == "10.20.30.40:8888") - test.Assert(t, atomic.LoadInt32(&callTimes) == 2) -} - type mockResult struct { - result mockResp + result mockResp + callTimes int32 sync.RWMutex } @@ -1190,18 +585,30 @@ type mockResp struct { msg string } -func (r *mockResult) GetResult() interface{} { +func (r *mockResult) getResult() interface{} { r.RLock() defer r.RUnlock() return r.result } -func (r *mockResult) SetResult(ret mockResp) { +func (r *mockResult) setResult(ret mockResp) { r.Lock() defer r.Unlock() r.result = ret } +func (r *mockResult) setCallTimes(ct int32) { + r.Lock() + defer r.Unlock() + r.callTimes = ct +} + +func (r *mockResult) getCallTimes() int32 { + r.RLock() + defer r.RUnlock() + return r.callTimes +} + func TestNewRetryContainerWithOptions(t *testing.T) { t.Run("no_option", func(t *testing.T) { rc := NewRetryContainer() diff --git a/pkg/retry/util.go b/pkg/retry/util.go index 3d00717153..8c4731e312 100644 --- a/pkg/retry/util.go +++ b/pkg/retry/util.go @@ -127,7 +127,7 @@ func makeRetryErr(ctx context.Context, msg string, callTimes int32) error { ri := rpcinfo.GetRPCInfo(ctx) to := ri.To() - errMsg := fmt.Sprintf("retry[%d] failed, %s, to=%s, method=%s", callTimes-1, msg, to.ServiceName(), to.Method()) + errMsg := fmt.Sprintf("retry[%d] failed, %s, toService=%s, method=%s", callTimes-1, msg, to.ServiceName(), to.Method()) target := to.Address() if target != nil { errMsg = fmt.Sprintf("%s, remote=%s", errMsg, target.String())