From f9dd5e958b759dce68f8cdd1baaaa067c373b161 Mon Sep 17 00:00:00 2001 From: Gregory Newman-Smith <109068393+gregns1@users.noreply.github.com> Date: Tue, 9 May 2023 15:03:40 +0100 Subject: [PATCH 01/42] CBG-2894: Reject user auth when channel threshold is over 500 (#6214) * CBG-2894: Reject user auth when channel threshold is over 500 in serverless mode * fix panic where authetciator was needed and it wasn't availible * linter issue * linter issue again * remove extra methods off interface * pass user into function * rebase * ensure 500 code is retruned for http error added * updates based off comments * fix panic * updates based off comments * updates based off dicussion yesterday * lint error * updates based of comments --- auth/auth.go | 91 ++++++++++++++++++++++++++++++++++--- auth/auth_test.go | 111 ++++++++++++++++++++++++++++++++++++++++++++++ auth/principal.go | 3 ++ auth/user.go | 4 ++ base/constants.go | 3 ++ base/error.go | 5 +++ db/database.go | 19 +++++--- 7 files changed, 224 insertions(+), 12 deletions(-) diff --git a/auth/auth.go b/auth/auth.go index 2572a2ddfa..118d9fbc1f 100644 --- a/auth/auth.go +++ b/auth/auth.go @@ -32,11 +32,12 @@ type Authenticator struct { } type AuthenticatorOptions struct { - ClientPartitionWindow time.Duration - ChannelsWarningThreshold *uint32 - SessionCookieName string - BcryptCost int - LogCtx context.Context + ClientPartitionWindow time.Duration + ChannelsWarningThreshold *uint32 + ServerlessChannelThreshold uint32 + SessionCookieName string + BcryptCost int + LogCtx context.Context // Collections defines the set of collections used by the authenticator when rebuilding channels. // Channels are only recomputed for collections included in this set. @@ -196,6 +197,17 @@ func (auth *Authenticator) getPrincipal(docID string, factory func() Principal) } changed = true } + // If the channel threshold has been set we need to check the inherited channels across all scopes and collections against the limit + if auth.ServerlessChannelThreshold != 0 { + channelsLength, err := auth.getInheritedChannelsLength(user) + if err != nil { + return nil, nil, false, err + } + err = auth.checkChannelLimits(channelsLength, user) + if err != nil { + return nil, nil, false, err + } + } } if changed { @@ -223,6 +235,73 @@ func (auth *Authenticator) getPrincipal(docID string, factory func() Principal) return princ, nil } +// inheritedCollectionChannels returns channels for a given scope + collection +func (auth *Authenticator) inheritedCollectionChannels(user User, scope, collection string) (ch.TimedSet, error) { + roles, err := auth.getUserRoles(user) + if err != nil { + return nil, err + } + + channels := user.CollectionChannels(scope, collection) + for _, role := range roles { + roleSince := user.RoleNames()[role.Name()] + channels.AddAtSequence(role.CollectionChannels(scope, collection), roleSince.Sequence) + } + return channels, nil +} + +// getInheritedChannelsLength returns number of channels a user has access to across all collections +func (auth *Authenticator) getInheritedChannelsLength(user User) (int, error) { + var cumulativeChannels int + for scope, collections := range auth.Collections { + for collection := range collections { + channels, err := auth.inheritedCollectionChannels(user, scope, collection) + if err != nil { + return 0, err + } + cumulativeChannels += len(channels) + } + } + return cumulativeChannels, nil +} + +// checkChannelLimits logs a warning when the warning threshold is met and will return an error when the channel limit is met +func (auth *Authenticator) checkChannelLimits(channels int, user User) error { + // Error if ServerlessChannelThreshold is set and is >= than the threshold + if uint32(channels) >= auth.ServerlessChannelThreshold { + base.ErrorfCtx(auth.LogCtx, "User ID: %v channel count: %d exceeds %d for channels per user threshold. Auth will be rejected until rectified", + base.UD(user.Name()), channels, auth.ServerlessChannelThreshold) + return base.ErrMaximumChannelsForUserExceeded + } + + // This function is likely to be called once per session when a channel limit is applied, the sync once + // applied here ensures we don't fill logs with warnings about being over warning threshold. We may want + // to revisit this implementation around the warning threshold in future + user.GetWarnChanSync().Do(func() { + if channelsPerUserThreshold := auth.ChannelsWarningThreshold; channelsPerUserThreshold != nil { + if uint32(channels) >= *channelsPerUserThreshold { + base.WarnfCtx(auth.LogCtx, "User ID: %v channel count: %d exceeds %d for channels per user warning threshold", + base.UD(user.Name()), channels, *channelsPerUserThreshold) + } + } + }) + return nil +} + +// getUserRoles gets all roles a user has been granted +func (auth *Authenticator) getUserRoles(user User) ([]Role, error) { + roles := make([]Role, 0, len(user.RoleNames())) + for name := range user.RoleNames() { + role, err := auth.GetRole(name) + if err != nil { + return nil, err + } else if role != nil { + roles = append(roles, role) + } + } + return roles, nil +} + // Rebuild channels computes the full set of channels for all collections defined for the authenticator. // For each collection in Authenticator.collections: // - if there is no CollectionAccess on the principal for the collection, rebuilds channels for that collection @@ -230,6 +309,7 @@ func (auth *Authenticator) getPrincipal(docID string, factory func() Principal) func (auth *Authenticator) rebuildChannels(princ Principal) (changed bool, err error) { changed = false + for scope, collections := range auth.Collections { for collection, _ := range collections { // If collection channels are nil, they have been invalidated and must be rebuilt @@ -242,6 +322,7 @@ func (auth *Authenticator) rebuildChannels(princ Principal) (changed bool, err e } } } + return changed, nil } diff --git a/auth/auth_test.go b/auth/auth_test.go index d6d7079372..f1378b763d 100644 --- a/auth/auth_test.go +++ b/auth/auth_test.go @@ -2752,6 +2752,117 @@ func TestObtainChannelsForDeletedRole(t *testing.T) { } } +func TestServerlessChannelLimitsRoles(t *testing.T) { + testCases := []struct { + Name string + Collection bool + }{ + { + Name: "Single role", + }, + { + Name: "Muliple roles", + }, + } + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + testBucket := base.GetTestBucket(t) + defer testBucket.Close() + dataStore := testBucket.GetSingleDataStore() + var role2 Role + + opts := DefaultAuthenticatorOptions() + opts.ServerlessChannelThreshold = 5 + opts.Collections = map[string]map[string]struct{}{ + "scope1": {"collection1": struct{}{}, "collection2": struct{}{}}, + } + auth := NewAuthenticator(dataStore, nil, opts) + user1, err := auth.NewUser("user1", "pass", ch.BaseSetOf(t, "ABC")) + require.NoError(t, err) + err = auth.Save(user1) + require.NoError(t, err) + _, err = auth.AuthenticateUser("user1", "pass") + require.NoError(t, err) + + role1, err := auth.NewRole("role1", nil) + require.NoError(t, err) + if testCase.Name == "Single role" { + user1.SetExplicitRoles(ch.TimedSet{"role1": ch.NewVbSimpleSequence(1)}, 1) + require.NoError(t, auth.Save(user1)) + _, err = auth.AuthenticateUser("user1", "pass") + require.NoError(t, err) + + role1.SetCollectionExplicitChannels("scope1", "collection1", ch.AtSequence(ch.BaseSetOf(t, "ABC", "DEF", "GHI", "JKL"), 1), 1) + require.NoError(t, auth.Save(role1)) + } else { + role2, err = auth.NewRole("role2", nil) + require.NoError(t, err) + user1.SetExplicitRoles(ch.TimedSet{"role1": ch.NewVbSimpleSequence(1), "role2": ch.NewVbSimpleSequence(1)}, 1) + require.NoError(t, auth.Save(user1)) + role1.SetCollectionExplicitChannels("scope1", "collection1", ch.AtSequence(ch.BaseSetOf(t, "ABC", "DEF", "GHI", "JKL"), 1), 1) + role2.SetCollectionExplicitChannels("scope1", "collection2", ch.AtSequence(ch.BaseSetOf(t, "MNO", "PQR"), 1), 1) + require.NoError(t, auth.Save(role1)) + require.NoError(t, auth.Save(role2)) + } + _, err = auth.AuthenticateUser("user1", "pass") + require.Error(t, err) + }) + } +} + +func TestServerlessChannelLimits(t *testing.T) { + + testCases := []struct { + Name string + Collection bool + }{ + { + Name: "Collection not enabled", + Collection: false, + }, + { + Name: "Collection is enabled", + Collection: true, + }, + } + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + testBucket := base.GetTestBucket(t) + defer testBucket.Close() + dataStore := testBucket.GetSingleDataStore() + + opts := DefaultAuthenticatorOptions() + opts.ServerlessChannelThreshold = 5 + if testCase.Collection { + opts.Collections = map[string]map[string]struct{}{ + "scope1": {"collection1": struct{}{}, "collection2": struct{}{}}, + } + } + auth := NewAuthenticator(dataStore, nil, opts) + user1, err := auth.NewUser("user1", "pass", ch.BaseSetOf(t, "ABC")) + require.NoError(t, err) + err = auth.Save(user1) + require.NoError(t, err) + _, err = auth.AuthenticateUser("user1", "pass") + require.NoError(t, err) + + if !testCase.Collection { + user1.SetCollectionExplicitChannels("_default", "_default", ch.AtSequence(ch.BaseSetOf(t, "ABC", "DEF", "GHI", "JKL", "MNO", "PQR"), 1), 1) + err = auth.Save(user1) + require.NoError(t, err) + } else { + user1.SetCollectionExplicitChannels("scope1", "collection1", ch.AtSequence(ch.BaseSetOf(t, "ABC", "DEF", "GHI", "JKL"), 1), 1) + user1.SetCollectionExplicitChannels("scope1", "collection2", ch.AtSequence(ch.BaseSetOf(t, "MNO", "PQR"), 1), 1) + err = auth.Save(user1) + require.NoError(t, err) + } + _, err = auth.AuthenticateUser("user1", "pass") + require.Error(t, err) + assert.Contains(t, err.Error(), base.ErrMaximumChannelsForUserExceeded.Error()) + }) + } +} + func TestInvalidateRoles(t *testing.T) { testBucket := base.GetTestBucket(t) defer testBucket.Close() diff --git a/auth/principal.go b/auth/principal.go index 0f9d83ef7d..39c439c749 100644 --- a/auth/principal.go +++ b/auth/principal.go @@ -9,6 +9,7 @@ package auth import ( + "sync" "time" "github.com/couchbase/sync_gateway/base" @@ -125,6 +126,8 @@ type User interface { InitializeRoles() + GetWarnChanSync() *sync.Once + revokedChannels(since uint64, lowSeq uint64, triggeredBy uint64) RevokedChannels // Obtains the period over which the user had access to the given channel. Either directly or via a role. diff --git a/auth/user.go b/auth/user.go index 34582e76d3..54a2ca9090 100644 --- a/auth/user.go +++ b/auth/user.go @@ -183,6 +183,10 @@ func (user *userImpl) SetEmail(email string) error { return nil } +func (user *userImpl) GetWarnChanSync() *sync.Once { + return &user.warnChanThresholdOnce +} + func (user *userImpl) RoleNames() ch.TimedSet { if user.RoleInvalSeq != 0 { return nil diff --git a/base/constants.go b/base/constants.go index e5169a0aa8..ed02018fce 100644 --- a/base/constants.go +++ b/base/constants.go @@ -151,6 +151,9 @@ const ( // DefaultJavascriptTimeoutSecs is number of seconds before Javascript functions (i.e. the sync function or import filter) timeout // If set to zero, timeout is disabled. DefaultJavascriptTimeoutSecs = uint32(0) + + // ServerlessChannelLimit is hard limit on channels allowed per user when running in serverless mode + ServerlessChannelLimit = 500 ) const ( diff --git a/base/error.go b/base/error.go index 45bb127da5..f96a5a2b8d 100644 --- a/base/error.go +++ b/base/error.go @@ -67,6 +67,9 @@ var ( // ErrConfigRegistryReloadRequired is returned when a db config fetch requires a registry reload based on version mismatch (config is newer) ErrConfigRegistryReloadRequired = &sgError{"Config registry reload required"} + + // ErrMaximumChannelsForUserExceeded is returned when running in serverless mode and the user has more than 500 channels granted to them + ErrMaximumChannelsForUserExceeded = &sgError{fmt.Sprintf("User has exceeded maximum of %d channels", ServerlessChannelLimit)} ) func (e *sgError) Error() string { @@ -115,6 +118,8 @@ func ErrorAsHTTPStatus(err error) (int, string) { return http.StatusRequestEntityTooLarge, "Document too large!" case ErrViewTimeoutError: return http.StatusServiceUnavailable, unwrappedErr.Error() + case ErrMaximumChannelsForUserExceeded: + return http.StatusInternalServerError, "Maximum number of channels exceeded for this user" } // gocb V2 errors diff --git a/db/database.go b/db/database.go index e1026af8e2..e4e6b3ec7c 100644 --- a/db/database.go +++ b/db/database.go @@ -1065,16 +1065,21 @@ func (context *DatabaseContext) Authenticator(ctx context.Context) *auth.Authent if context.Options.UnsupportedOptions != nil && context.Options.UnsupportedOptions.WarningThresholds != nil { channelsWarningThreshold = context.Options.UnsupportedOptions.WarningThresholds.ChannelsPerUser } + var channelServerlessThreshold uint32 + if context.IsServerless() { + channelServerlessThreshold = base.ServerlessChannelLimit + } // Authenticators are lightweight & stateless, so it's OK to return a new one every time authenticator := auth.NewAuthenticator(context.MetadataStore, context, auth.AuthenticatorOptions{ - ClientPartitionWindow: context.Options.ClientPartitionWindow, - ChannelsWarningThreshold: channelsWarningThreshold, - SessionCookieName: sessionCookieName, - BcryptCost: context.Options.BcryptCost, - LogCtx: ctx, - Collections: context.CollectionNames, - MetaKeys: context.MetadataKeys, + ClientPartitionWindow: context.Options.ClientPartitionWindow, + ChannelsWarningThreshold: channelsWarningThreshold, + ServerlessChannelThreshold: channelServerlessThreshold, + SessionCookieName: sessionCookieName, + BcryptCost: context.Options.BcryptCost, + LogCtx: ctx, + Collections: context.CollectionNames, + MetaKeys: context.MetadataKeys, }) return authenticator From ae024a247c0ee62a5598121843e9904084ac15e1 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Tue, 9 May 2023 16:21:08 -0400 Subject: [PATCH 02/42] Report the correct error (#6232) --- rest/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rest/config.go b/rest/config.go index 0d6e17feed..43b6d6a9f7 100644 --- a/rest/config.go +++ b/rest/config.go @@ -1372,7 +1372,7 @@ func (sc *ServerContext) migrateV30Configs(ctx context.Context) error { if getErr == base.ErrNotFound { continue } else if getErr != nil { - return fmt.Errorf("Error retrieving 3.0 config for bucket: %s, groupID: %s: %w", bucketName, groupID, err) + return fmt.Errorf("Error retrieving 3.0 config for bucket: %s, groupID: %s: %w", bucketName, groupID, getErr) } base.InfofCtx(ctx, base.KeyConfig, "Found legacy persisted config for database %s - migrating to db registry.", base.MD(dbConfig.Name)) From c776492da51e6ab3db869a25a9dd58f584687619 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Wed, 10 May 2023 10:50:24 -0400 Subject: [PATCH 03/42] CBG-2916 Add database examples with scopes (#6233) --- .../collections-with-custom-scope.json | 38 +++++++++++++++++++ .../collections-with-default-collection.json | 38 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 examples/database_config/collections-with-custom-scope.json create mode 100644 examples/database_config/collections-with-default-collection.json diff --git a/examples/database_config/collections-with-custom-scope.json b/examples/database_config/collections-with-custom-scope.json new file mode 100644 index 0000000000..5d7a6c4bb6 --- /dev/null +++ b/examples/database_config/collections-with-custom-scope.json @@ -0,0 +1,38 @@ +{ + "name": "db", + "bucket": "bucket", + "scopes" : { + "scope1": { + "collections": { + "collection1" : { + "sync": ` + function(doc, oldDoc, meta) { + if (doc.type != "default") { + throw({forbidden : "Rejected document"}) + } + channel("legacy") + } + `, + "import_filter": ` + function(doc) { + return doc.type == "mobile" + } + ` + }, + "collection2" : { + "sync": ` + function(doc, oldDoc, meta) { + channel("collection1") + } + `, + "import_filter": ` + function(doc) { + return doc.type == "mobile" + } + ` + } + } + } + }, + "num_index_replicas": 0 +} diff --git a/examples/database_config/collections-with-default-collection.json b/examples/database_config/collections-with-default-collection.json new file mode 100644 index 0000000000..907b1f659f --- /dev/null +++ b/examples/database_config/collections-with-default-collection.json @@ -0,0 +1,38 @@ +{ + "name": "db", + "bucket": "bucket", + "scopes" : { + "_default": { + "collections": { + "_default" : { + "sync": ` + function(doc, oldDoc, meta) { + if (doc.collection != "default") { + throw({forbidden : "Rejected document"}) + } + channel("legacy") + } + `, + "import_filter": ` + function(doc) { + return doc.type == "mobile" + } + ` + }, + "collection1" : { + "sync": ` + function(doc, oldDoc, meta) { + channel("collection1") + } + `, + "import_filter": ` + function(doc) { + return doc.type == "mobile" + } + ` + } + } + } + }, + "num_index_replicas": 0 +} From ddb447f1f82092fe24fe1055dfa5f6e44562dabb Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Wed, 10 May 2023 10:51:04 -0400 Subject: [PATCH 04/42] Update scopes documentation (#6231) --- docs/api/components/schemas.yaml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/api/components/schemas.yaml b/docs/api/components/schemas.yaml index 40a4ac5cd4..8dcdd9ae17 100644 --- a/docs/api/components/schemas.yaml +++ b/docs/api/components/schemas.yaml @@ -1090,7 +1090,7 @@ CollectionConfig: sync: description: The Javascript function that newly created documents in this collection are ran through. type: string - example: 'function(doc){channel(doc.channels);}' + example: 'function(doc){channel("collection name");}' import_filter: description: |- This is the function that all imported documents in this collection are ran through in order to filter out what to import and what not to import. This allows you to control what is made available to Couchbase Mobile clients. If it is not set, then no documents are filtered when imported. @@ -1165,7 +1165,9 @@ Database: description: The name of the database. type: string sync: - description: The Javascript function that newly created documents are ran through for the _default scope and collection. + description: The Javascript function that newly created documents are ran through for the default scope and collection. + + If `scopes` parameter is set, this is ignored. type: string default: 'function(doc){channel(doc.channels);}' users: @@ -1203,9 +1205,11 @@ Database: default: 16 import_filter: description: |- - This is the function that all imported documents in the _default scope and collection are ran through in order to filter out what to import and what not to import. This allows you to control what is made available to Couchbase Mobile clients. If it is not set, then no documents are filtered when imported. + This is the function that all imported documents in the default scope and collection are ran through in order to filter out what to import and what not to import. This allows you to control what is made available to Couchbase Mobile clients. If it is not set, then no documents are filtered when imported. `import_docs` must be true to make this field applicable. + + If `scopes` parameter is set, this is ignored. type: string example: 'function(doc) { if (doc.type != ''mobile'') { return false; } return true; }' import_backup_old_rev: From 370a2bd1a14db1ad6b007689ed7902abb997f66b Mon Sep 17 00:00:00 2001 From: Gregory Newman-Smith <109068393+gregns1@users.noreply.github.com> Date: Thu, 11 May 2023 13:20:55 +0100 Subject: [PATCH 05/42] CBG-2895: Add static replication connection limit (#6226) --- base/error.go | 5 + base/stats.go | 7 ++ docs/api/components/schemas.yaml | 43 ++++++++ docs/api/paths/admin/_config.yaml | 2 +- rest/admin_api.go | 11 ++ rest/adminapitest/admin_api_test.go | 56 ++++++++++ rest/blip_sync.go | 45 ++++++++ rest/changes_api.go | 9 ++ rest/config_flags.go | 5 +- rest/config_startup.go | 5 +- rest/replicatortest/replicator_test.go | 141 +++++++++++++++++++++++++ rest/server_context.go | 10 ++ rest/utilities_testing_resttester.go | 15 +++ 13 files changed, 349 insertions(+), 5 deletions(-) diff --git a/base/error.go b/base/error.go index f96a5a2b8d..aaa7d0cf82 100644 --- a/base/error.go +++ b/base/error.go @@ -70,6 +70,9 @@ var ( // ErrMaximumChannelsForUserExceeded is returned when running in serverless mode and the user has more than 500 channels granted to them ErrMaximumChannelsForUserExceeded = &sgError{fmt.Sprintf("User has exceeded maximum of %d channels", ServerlessChannelLimit)} + + // ErrReplicationLimitExceeded is returned when then replication connection threshold is exceeded + ErrReplicationLimitExceeded = &sgError{"Replication limit exceeded. Try agin later."} ) func (e *sgError) Error() string { @@ -120,6 +123,8 @@ func ErrorAsHTTPStatus(err error) (int, string) { return http.StatusServiceUnavailable, unwrappedErr.Error() case ErrMaximumChannelsForUserExceeded: return http.StatusInternalServerError, "Maximum number of channels exceeded for this user" + case ErrReplicationLimitExceeded: + return http.StatusServiceUnavailable, unwrappedErr.Error() } // gocb V2 errors diff --git a/base/stats.go b/base/stats.go index fd5560a78b..0bb2518d38 100644 --- a/base/stats.go +++ b/base/stats.go @@ -493,6 +493,8 @@ type DatabaseStats struct { SyncFunctionTime *SgwIntStat `json:"sync_function_time"` // The total number of times that a sync function encountered an exception (across all collections). SyncFunctionExceptionCount *SgwIntStat `json:"sync_function_exception_count"` + // The total number of times a replication connection is rejected due ot it being over the threshold + NumReplicationsRejectedLimit *SgwIntStat `json:"num_replications_rejected_limit"` // These can be cleaned up in future versions of SGW, implemented as maps to reduce amount of potential risk // prior to Hydrogen release. These are not exported as part of prometheus and only exposed through expvars @@ -1412,6 +1414,10 @@ func (d *DbStats) initDatabaseStats() error { if err != nil { return err } + resUtil.NumReplicationsRejectedLimit, err = NewIntStat(SubsystemDatabaseKey, "num_replications_rejected_limit", labelKeys, labelVals, prometheus.CounterValue, 0) + if err != nil { + return err + } resUtil.ImportFeedMapStats = &ExpVarMapWrapper{new(expvar.Map).Init()} resUtil.CacheFeedMapStats = &ExpVarMapWrapper{new(expvar.Map).Init()} @@ -1453,6 +1459,7 @@ func (d *DbStats) unregisterDatabaseStats() { prometheus.Unregister(d.DatabaseStats.SyncFunctionCount) prometheus.Unregister(d.DatabaseStats.SyncFunctionTime) prometheus.Unregister(d.DatabaseStats.SyncFunctionExceptionCount) + prometheus.Unregister(d.DatabaseStats.NumReplicationsRejectedLimit) } func (d *DbStats) CollectionStat(scopeName, collectionName string) (*CollectionStats, error) { diff --git a/docs/api/components/schemas.yaml b/docs/api/components/schemas.yaml index 8dcdd9ae17..5d28e8a9b1 100644 --- a/docs/api/components/schemas.yaml +++ b/docs/api/components/schemas.yaml @@ -2116,6 +2116,9 @@ Startup-config: type: integer maximum: 9 minimum: 0 + max_concurrent_replications: + description: Maximum number of concurrent replication connections allowed. If set to 0 this limit will be ignored. + type: integer readOnly: true unsupported: description: Settings that are not officially supported. It is highly recommended these are **not** used. @@ -2166,6 +2169,46 @@ Startup-config: type: integer readOnly: true title: Startup-config +Runtime-config: + type: object + properties: + logging: + description: The configuration settings for modifying Sync Gateway logging. + type: object + properties: + log_file_path: + description: Absolute or relative path on the filesystem to the log file directory. A relative path is from the directory that contains the Sync Gateway executable file. + type: string + readOnly: true + redaction_level: + description: Redaction level to apply to log output. + type: string + default: partial + enum: + - none + - partial + - full + - unset + readOnly: true + console: + $ref: '#/Console-logging-config' + error: + $ref: '#/File-logging-config' + warn: + $ref: '#/File-logging-config' + info: + $ref: '#/File-logging-config' + debug: + $ref: '#/File-logging-config' + trace: + $ref: '#/File-logging-config' + stats: + $ref: '#/File-logging-config' + max_concurrent_replications: + description: Maximum number of concurrent replication connections allowed. If set to 0 this limit will be ignored. + type: integer + default: 0 + title: Runtime-config File-logging-config: type: object properties: diff --git a/docs/api/paths/admin/_config.yaml b/docs/api/paths/admin/_config.yaml index 28dd1a84aa..0bc789a2a8 100644 --- a/docs/api/paths/admin/_config.yaml +++ b/docs/api/paths/admin/_config.yaml @@ -49,7 +49,7 @@ put: content: application/json: schema: - $ref: ../../components/schemas.yaml#/Startup-config + $ref: ../../components/schemas.yaml#/Runtime-config responses: '200': description: Successfully set runtime options diff --git a/rest/admin_api.go b/rest/admin_api.go index ee67dff011..324129eb63 100644 --- a/rest/admin_api.go +++ b/rest/admin_api.go @@ -411,6 +411,7 @@ func (h *handler) handlePutConfig() error { Trace FileLoggerPutConfig `json:"trace,omitempty"` Stats FileLoggerPutConfig `json:"stats,omitempty"` } `json:"logging"` + ReplicationLimit *int `json:"max_concurrent_replications,omitempty"` } var config ServerPutConfig @@ -462,6 +463,16 @@ func (h *handler) handlePutConfig() error { base.EnableStatsLogger(*config.Logging.Stats.Enabled) } + if config.ReplicationLimit != nil { + if *config.ReplicationLimit < 0 { + return base.HTTPErrorf(http.StatusBadRequest, "replication limit cannot be less than 0") + } + h.server.Config.Replicator.MaxConcurrentReplications = *config.ReplicationLimit + h.server.ActiveReplicationsCounter.lock.Lock() + h.server.ActiveReplicationsCounter.activeReplicatorLimit = *config.ReplicationLimit + h.server.ActiveReplicationsCounter.lock.Unlock() + } + return base.HTTPErrorf(http.StatusOK, "Updated") } diff --git a/rest/adminapitest/admin_api_test.go b/rest/adminapitest/admin_api_test.go index 8bf10b64ed..720f05cc4c 100644 --- a/rest/adminapitest/admin_api_test.go +++ b/rest/adminapitest/admin_api_test.go @@ -241,6 +241,62 @@ func TestLoggingKeys(t *testing.T) { assert.Equal(t, map[string]interface{}{}, noLogKeys) } +func TestServerlessChangesEndpointLimit(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyReplicate, base.KeyHTTP, base.KeyHTTPResp, base.KeySync, base.KeySyncMsg, base.KeyChanges) + rt := rest.NewRestTester(t, &rest.RestTesterConfig{ + SyncFn: `function(doc) {channel(doc.channel);}`, + }) + defer rt.Close() + + resp := rt.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 2}`) + rest.RequireStatus(t, resp, http.StatusOK) + resp = rt.SendAdminRequest("PUT", "/db/_user/alice", rest.GetUserPayload(t, "alice", "letmein", "", rt.GetSingleTestDatabaseCollection(), []string{"ABC"}, nil)) + rest.RequireStatus(t, resp, 201) + + // Put several documents in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs1", `{"value":1, "channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs2", `{"value":2, "channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs3", `{"value":3, "channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + + changesJSON := `{"style":"all_docs", + "heartbeat":300000, + "feed":"longpoll", + "limit":50, + "since":"1", + "filter":"` + base.ByChannelFilter + `", + "channels":"ABC,PBS"}` + var wg sync.WaitGroup + wg.Add(2) + + // send some changes requests in go routines to run concurrently along with test + go func() { + defer wg.Done() + resp1 := rt.SendUserRequest(http.MethodPost, "/{{.keyspace}}/_changes", changesJSON, "alice") + rest.RequireStatus(t, resp1, http.StatusOK) + }() + + go func() { + defer wg.Done() + resp2 := rt.SendUserRequest(http.MethodPost, "/{{.keyspace}}/_changes", changesJSON, "alice") + rest.RequireStatus(t, resp2, http.StatusOK) + }() + + // assert count for replicators is correct according to changes request made above + rt.WaitForActiveReplicatorCount(2) + + // assert this request is rejected due to this request taking us over the limit + resp = rt.SendAdminRequest(http.MethodGet, "/{{.keyspace}}/_changes?feed=longpoll&since=999999&timeout=100000", "") + rest.RequireStatus(t, resp, http.StatusServiceUnavailable) + // put doc to end changes feeds + resp = rt.SendAdminRequest("PUT", "/{{.keyspace}}/abc1", `{"value":3, "channel":["ABC"]}`) + rest.RequireStatus(t, resp, 201) + wg.Wait() +} + func TestLoggingLevels(t *testing.T) { if base.GlobalTestLoggingSet.IsTrue() { t.Skip("Test does not work when a global test log level is set") diff --git a/rest/blip_sync.go b/rest/blip_sync.go index 01242a4923..931c96fe8e 100644 --- a/rest/blip_sync.go +++ b/rest/blip_sync.go @@ -11,6 +11,7 @@ licenses/APL2.txt. package rest import ( + "context" "fmt" "net/http" @@ -22,6 +23,16 @@ import ( // HTTP handler for incoming BLIP sync WebSocket request (/db/_blipsync) func (h *handler) handleBLIPSync() error { + needRelease, err := h.server.incrementConcurrentReplications(h.rqCtx) + if err != nil { + h.db.DbStats.Database().NumReplicationsRejectedLimit.Add(1) + return err + } + // if we haven't incremented the active replicator due to MaxConcurrentReplications being 0, we don't need to decrement it + if needRelease { + defer h.server.decrementConcurrentReplications(h.rqCtx) + } + // Exit early when the connection can't be switched to websocket protocol. if _, ok := h.response.(http.Hijacker); !ok { base.DebugfCtx(h.ctx(), base.KeyHTTP, "Non-upgradable request received for BLIP+WebSocket protocol") @@ -71,3 +82,37 @@ func (h *handler) handleBLIPSync() error { return nil } + +// incrementConcurrentReplications increments the number of active replications (if there is capacity to do so) +// and rejects calls if no capacity is available +func (sc *ServerContext) incrementConcurrentReplications(ctx context.Context) (bool, error) { + // lock replications config limit + the active replications counter + sc.ActiveReplicationsCounter.lock.Lock() + defer sc.ActiveReplicationsCounter.lock.Unlock() + // if max concurrent replications is 0 then we don't need to keep track of concurrent replications + if sc.ActiveReplicationsCounter.activeReplicatorLimit == 0 { + return false, nil + } + + capacity := sc.ActiveReplicationsCounter.activeReplicatorLimit + count := sc.ActiveReplicationsCounter.activeReplicatorCount + + if count >= capacity { + base.InfofCtx(ctx, base.KeyHTTP, "Replication limit exceeded (active: %d limit: %d)", count, capacity) + return false, base.ErrReplicationLimitExceeded + } + sc.ActiveReplicationsCounter.activeReplicatorCount++ + base.TracefCtx(ctx, base.KeyHTTP, "Acquired replication slot (active: %d/%d)", sc.ActiveReplicationsCounter.activeReplicatorCount, capacity) + + return true, nil +} + +// decrementConcurrentReplications decrements the number of active replications on the server context +func (sc *ServerContext) decrementConcurrentReplications(ctx context.Context) { + // lock replications config limit + the active replications counter + sc.ActiveReplicationsCounter.lock.Lock() + defer sc.ActiveReplicationsCounter.lock.Unlock() + connections := sc.ActiveReplicationsCounter.activeReplicatorLimit + sc.ActiveReplicationsCounter.activeReplicatorCount-- + base.TracefCtx(ctx, base.KeyHTTP, "Released replication slot (active: %d/%d)", sc.activeReplicatorCount, connections) +} diff --git a/rest/changes_api.go b/rest/changes_api.go index 00c06170c0..408ca401b9 100644 --- a/rest/changes_api.go +++ b/rest/changes_api.go @@ -248,6 +248,15 @@ func (h *handler) handleChanges() error { feed = "normal" } + needRelease, concurrentReplicationsErr := h.server.incrementConcurrentReplications(h.rqCtx) + if concurrentReplicationsErr != nil { + return concurrentReplicationsErr + } + // if we haven't incremented the active replicator due to MaxConcurrentReplications being 0, we don't need to decrement it + if needRelease { + defer h.server.decrementConcurrentReplications(h.rqCtx) + } + // Get the channels as parameters to an imaginary "bychannel" filter. // The default is all channels the user can access. userChannels := base.SetOf(ch.AllChannelWildcard) diff --git a/rest/config_flags.go b/rest/config_flags.go index 8c69c71521..c53b32c635 100644 --- a/rest/config_flags.go +++ b/rest/config_flags.go @@ -122,8 +122,9 @@ func registerConfigFlags(config *StartupConfig, fs *flag.FlagSet) map[string]con "auth.bcrypt_cost": {&config.Auth.BcryptCost, fs.Int("auth.bcrypt_cost", 0, "Cost to use for bcrypt password hashes")}, - "replicator.max_heartbeat": {&config.Replicator.MaxHeartbeat, fs.String("replicator.max_heartbeat", "", "Max heartbeat value for _changes request")}, - "replicator.blip_compression": {&config.Replicator.BLIPCompression, fs.Int("replicator.blip_compression", 0, "BLIP data compression level (0-9)")}, + "replicator.max_heartbeat": {&config.Replicator.MaxHeartbeat, fs.String("replicator.max_heartbeat", "", "Max heartbeat value for _changes request")}, + "replicator.blip_compression": {&config.Replicator.BLIPCompression, fs.Int("replicator.blip_compression", 0, "BLIP data compression level (0-9)")}, + "replicator.max_concurrent_replications": {&config.Replicator.MaxConcurrentReplications, fs.Int("replicator.max_concurrent_replications", 0, "Maximum number of replication connections to the node")}, "unsupported.stats_log_frequency": {&config.Unsupported.StatsLogFrequency, fs.String("unsupported.stats_log_frequency", "", "How often should stats be written to stats logs")}, "unsupported.use_stdlib_json": {&config.Unsupported.UseStdlibJSON, fs.Bool("unsupported.use_stdlib_json", false, "Bypass the jsoniter package and use Go's stdlib instead")}, diff --git a/rest/config_startup.go b/rest/config_startup.go index fc32247b89..5da211f21d 100644 --- a/rest/config_startup.go +++ b/rest/config_startup.go @@ -136,8 +136,9 @@ type AuthConfig struct { } type ReplicatorConfig struct { - MaxHeartbeat *base.ConfigDuration `json:"max_heartbeat,omitempty" help:"Max heartbeat value for _changes request"` - BLIPCompression *int `json:"blip_compression,omitempty" help:"BLIP data compression level (0-9)"` + MaxHeartbeat *base.ConfigDuration `json:"max_heartbeat,omitempty" help:"Max heartbeat value for _changes request"` + BLIPCompression *int `json:"blip_compression,omitempty" help:"BLIP data compression level (0-9)"` + MaxConcurrentReplications int `json:"max_concurrent_replications,omitempty" help:"Maximum number of replication connections to the node"` } type UnsupportedConfig struct { diff --git a/rest/replicatortest/replicator_test.go b/rest/replicatortest/replicator_test.go index 6b05397154..ac6701dcca 100644 --- a/rest/replicatortest/replicator_test.go +++ b/rest/replicatortest/replicator_test.go @@ -545,6 +545,147 @@ func TestPullReplicationAPI(t *testing.T) { assert.Equal(t, "rt2", doc2Body["source"]) } +func TestStopServerlessConnectionLimitingDuringReplications(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyReplicate, base.KeyHTTP, base.KeyHTTPResp, base.KeySync, base.KeySyncMsg) + + rt1, rt2, remoteURLString, teardown := rest.SetupSGRPeers(t) + defer teardown() + + resp := rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 2}`) + rest.RequireStatus(t, resp, http.StatusOK) + + for i := 0; i < 10; i++ { + _ = rt2.PutDoc(fmt.Sprint(i), `{"source":"rt2","channels":["alice"]}`) + } + + // create two replications to take us to the limit + replicationID := t.Name() + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + replicationID = t.Name() + "1" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + rt1.WaitForActiveReplicatorInitialization(2) + + // try create a new replication to take it beyond the threshold set by runtime config call + // assert it enter error state + replicationID = t.Name() + "2" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateError) + + // change limit to 0 (turning limiting off) and assert that the replications currently running continue as normal and reject any new ones being added + resp = rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 0}`) + rest.RequireStatus(t, resp, http.StatusOK) + + // assert the replications aren't killed as result of change in limit + rt2.WaitForActiveReplicatorCount(2) + // assert we still can create a new replication given that originally the limit was 2 replications + replicationID = t.Name() + "3" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + +} + +func TestServerlessConnectionLimitingOneshotFeed(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyReplicate, base.KeyHTTP, base.KeyHTTPResp, base.KeySync, base.KeySyncMsg) + + rt1, rt2, remoteURLString, teardown := rest.SetupSGRPeers(t) + defer teardown() + + // update runtime config to limit to 2 concurrent replication connections + resp := rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 2}`) + rest.RequireStatus(t, resp, http.StatusOK) + + for i := 0; i < 200; i++ { + _ = rt2.PutDoc(fmt.Sprint(i), `{"source":"rt2","channels":["alice"]}`) + } + + replicationID := t.Name() + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, false, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + replicationID = t.Name() + "1" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, false, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + + rt1.WaitForActiveReplicatorInitialization(2) + // assert the active replicator count has increased by 2 + rt2.WaitForActiveReplicatorCount(2) + replicationID = t.Name() + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateStopped) + replicationID = t.Name() + "1" + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateStopped) + + // assert that the count for active replicators has decreased by 2 as both replications have finished + rt2.WaitForActiveReplicatorCount(0) + + // assert we can create a new replication as count has decreased below threshold + replicationID = t.Name() + "2" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, false, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + +} + +func TestServerlessConnectionLimitingContinuous(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyReplicate, base.KeyHTTP, base.KeyHTTPResp, base.KeySync, base.KeySyncMsg) + + rt1, rt2, remoteURLString, teardown := rest.SetupSGRPeers(t) + defer teardown() + + // update runtime config to limit to 2 concurrent replication connections + resp := rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 2}`) + rest.RequireStatus(t, resp, http.StatusOK) + + for i := 0; i < 200; i++ { + _ = rt2.PutDoc(fmt.Sprint(i), `{"source":"rt2","channels":["alice"]}`) + } + + // create two replications to take us to the limit + replicationID := t.Name() + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + replicationID = t.Name() + "1" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + rt1.WaitForActiveReplicatorInitialization(2) + + // try create a new replication to take it beyond the threshold set by runtime config call + // assert it enter error state + replicationID = t.Name() + "2" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateError) + + // assert on stats + dbstats := rt2.GetDatabase().DbStats + assert.Equal(t, int64(2), dbstats.DatabaseStats.NumReplicationsRejectedLimit.Value()) + + // change limit to 1 and assert that the replications currently running continue as normal and reject any new ones being added + resp = rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 1}`) + rest.RequireStatus(t, resp, http.StatusOK) + + // assert the replications aren't killed as result of change in limit + rt2.WaitForActiveReplicatorCount(2) + // assert we still can't create a new replication + replicationID = t.Name() + "3" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateError) + + // stop one of the replicators currently running + resp = rt1.SendAdminRequest(http.MethodPut, "/{{.db}}/_replicationStatus/"+t.Name()+"1?action=stop", "") + rest.RequireStatus(t, resp, http.StatusOK) + rt1.WaitForReplicationStatus(t.Name()+"1", db.ReplicationStateStopped) + // assert the count has been decremented + rt2.WaitForActiveReplicatorCount(1) + + // assert we still can't create new replication (new limit is 1) + replicationID = t.Name() + "4" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateError) + +} + // TestPullReplicationAPI // - Starts 2 RestTesters, one active, and one passive. // - Creates a continuous pull replication on rt1 via the REST API diff --git a/rest/server_context.go b/rest/server_context.go index 483d7d5953..b3cec14df6 100644 --- a/rest/server_context.go +++ b/rest/server_context.go @@ -70,6 +70,13 @@ type ServerContext struct { LogContextID string // ID to differentiate log messages from different server context fetchConfigsLastUpdate time.Time // The last time fetchConfigsWithTTL() updated dbConfigs allowScopesInPersistentConfig bool // Test only backdoor to allow scopes in persistent config, not supported for multiple databases with different collections targeting the same bucket + ActiveReplicationsCounter +} + +type ActiveReplicationsCounter struct { + activeReplicatorCount int // The count of concurrent active replicators + activeReplicatorLimit int // The limit on number of active replicators allowed + lock sync.RWMutex // Lock for managing access to shared memory location } // defaultConfigRetryTimeout is the total retry time when waiting for in-flight config updates. Set as a multiple of kv op timeout, @@ -133,6 +140,9 @@ func NewServerContext(ctx context.Context, config *StartupConfig, persistentConf sc.Config.API.MetricsInterfaceAuthentication = base.BoolPtr(false) } } + if config.Replicator.MaxConcurrentReplications != 0 { + sc.ActiveReplicationsCounter.activeReplicatorLimit = config.Replicator.MaxConcurrentReplications + } sc.startStatsLogger(ctx) diff --git a/rest/utilities_testing_resttester.go b/rest/utilities_testing_resttester.go index b5790205a5..6f1aee79a1 100644 --- a/rest/utilities_testing_resttester.go +++ b/rest/utilities_testing_resttester.go @@ -167,6 +167,21 @@ func (rt *RestTester) WaitForAssignedReplications(count int) { require.NoError(rt.TB, rt.WaitForCondition(successFunc)) } +func (rt *RestTester) GetActiveReplicatorCount() int { + rt.ServerContext().ActiveReplicationsCounter.lock.Lock() + defer rt.ServerContext().ActiveReplicationsCounter.lock.Unlock() + return rt.ServerContext().ActiveReplicationsCounter.activeReplicatorCount +} + +func (rt *RestTester) WaitForActiveReplicatorCount(expCount int) { + var count int + successFunc := func() bool { + count = rt.GetActiveReplicatorCount() + return count == expCount + } + require.NoError(rt.TB, rt.WaitForCondition(successFunc), "Mismatch in active replicator count, expected count %d actual %d", expCount, count) +} + func (rt *RestTester) WaitForReplicationStatusForDB(dbName string, replicationID string, targetStatus string) { var status db.ReplicationStatus successFunc := func() bool { From 80d5eae87d8c06de29fec242a080c3032473b9f4 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Thu, 11 May 2023 14:46:33 +0100 Subject: [PATCH 06/42] API Spec cleanup (scopes/collections) (#6236) --- docs/api/components/schemas.yaml | 39 ++++++++++++++++---------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/docs/api/components/schemas.yaml b/docs/api/components/schemas.yaml index 5d28e8a9b1..310de54e9e 100644 --- a/docs/api/components/schemas.yaml +++ b/docs/api/components/schemas.yaml @@ -529,15 +529,14 @@ Design-doc: type: string views: type: object - properties: - additionalProperties: - description: The name of the view. - type: object - properties: - map: - type: string - reduce: - type: string + additionalProperties: + description: The name of the view. + type: object + properties: + map: + type: string + reduce: + type: string options: type: object properties: @@ -1078,13 +1077,17 @@ Replication-status: - replication_id title: Replication-status Scopes: - description: A map of all the collections with their corresponding configs for this scope + description: Scope-specific configuration. type: object - additionalProperties: - $ref: '#/CollectionConfig' + properties: + collections: + description: An object keyed by collection name containing config for the specific collection. + type: object + additionalProperties: + $ref: '#/CollectionConfig' title: Scopes CollectionConfig: - description: The configuration for the individual collection + description: Collection-specific configuration. type: object properties: sync: @@ -1156,11 +1159,10 @@ Database: type: integer default: 1000 scopes: - description: Scope and collection specific config. + description: An object keyed by scope name containing config for the specific collection. type: object - properties: - additionalProperties: - $ref: '#/Scopes' + additionalProperties: + $ref: '#/Scopes' name: description: The name of the database. type: string @@ -1788,8 +1790,7 @@ Event-config: options: description: The options for the event. type: object - properties: - additionalProperties: + additionalProperties: description: The option key and value. title: Event-config Resync-status: From a31be806b47b07cb9e3d7fdfaaaa8ec024d2f0a8 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Thu, 11 May 2023 11:22:59 -0400 Subject: [PATCH 07/42] CBG-2928 add blip stats for database (#6229) * Add stats for transferred of bytes data ReplicationBytesReceived and ReplicationBytesSent * Stats are updated when a BlipSyncContext exits and at a threshold set by DatabaseContextOptions.BlipStatsReportingInterval (set to thirty seconds arbitrarily to see perf impact). --- base/stats.go | 12 ++++++ db/blip_sync_context.go | 54 ++++++++++++++++++++++- db/database.go | 2 + go.mod | 2 +- go.sum | 4 +- rest/blip_stats_test.go | 96 +++++++++++++++++++++++++++++++++++++++++ rest/server_context.go | 4 ++ 7 files changed, 170 insertions(+), 4 deletions(-) create mode 100644 rest/blip_stats_test.go diff --git a/base/stats.go b/base/stats.go index 0bb2518d38..ccd5a37d36 100644 --- a/base/stats.go +++ b/base/stats.go @@ -430,6 +430,8 @@ type CollectionStats struct { } type DatabaseStats struct { + ReplicationBytesReceived *SgwIntStat `json:"replication_bytes_received"` + ReplicationBytesSent *SgwIntStat `json:"replication_bytes_sent"` // The compaction_attachment_start_time. CompactionAttachmentStartTime *SgwIntStat `json:"compaction_attachment_start_time"` // The compaction_tombstone_start_time. @@ -1286,6 +1288,14 @@ func (d *DbStats) initDatabaseStats() error { labelKeys := []string{DatabaseLabelKey} labelVals := []string{d.dbName} + resUtil.ReplicationBytesReceived, err = NewIntStat(SubsystemDatabaseKey, "replication_bytes_received", labelKeys, labelVals, prometheus.CounterValue, 0) + if err != nil { + return err + } + resUtil.ReplicationBytesSent, err = NewIntStat(SubsystemDatabaseKey, "replication_bytes_sent", labelKeys, labelVals, prometheus.CounterValue, 0) + if err != nil { + return err + } resUtil.CompactionAttachmentStartTime, err = NewIntStat(SubsystemDatabaseKey, "compaction_attachment_start_time", labelKeys, labelVals, prometheus.GaugeValue, 0) if err != nil { return err @@ -1427,6 +1437,8 @@ func (d *DbStats) initDatabaseStats() error { } func (d *DbStats) unregisterDatabaseStats() { + prometheus.Unregister(d.DatabaseStats.ReplicationBytesReceived) + prometheus.Unregister(d.DatabaseStats.ReplicationBytesSent) prometheus.Unregister(d.DatabaseStats.CompactionAttachmentStartTime) prometheus.Unregister(d.DatabaseStats.CompactionTombstoneStartTime) prometheus.Unregister(d.DatabaseStats.ConflictWriteCount) diff --git a/db/blip_sync_context.go b/db/blip_sync_context.go index 85c7e285a0..245e1e5f9c 100644 --- a/db/blip_sync_context.go +++ b/db/blip_sync_context.go @@ -19,6 +19,7 @@ import ( "runtime/debug" "strconv" "sync" + "sync/atomic" "time" "github.com/couchbase/go-blip" @@ -48,6 +49,7 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con if bsc.replicationStats == nil { bsc.replicationStats = NewBlipSyncStats() } + bsc.stats.lastReportTime.Store(time.Now().UnixMilli()) if u := db.User(); u != nil { bsc.userName = u.Name() @@ -74,7 +76,6 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con bsc.register(profile, handlerFn) } } - return bsc } @@ -115,6 +116,16 @@ type BlipSyncContext struct { readOnly bool collections *blipCollections // all collections handled by blipSyncContext, implicit or via GetCollections + + stats blipSyncStats // internal structure to store stats +} + +// blipSyncStats has support structures to support reporting stats at regular interval +type blipSyncStats struct { + bytesSent atomic.Uint64 // Total bytes sent to client + bytesReceived atomic.Uint64 // Total bytes received from client + lastReportTime atomic.Int64 // last time reported by time.Time // Last time blip stats were reported + lock sync.Mutex } // AllowedAttachment contains the metadata for handling allowed attachments @@ -195,6 +206,8 @@ func (bsc *BlipSyncContext) register(profile string, handlerFn func(*blipHandler respBody, _ := resp.Body() base.TracefCtx(bsc.loggingCtx, base.KeySyncMsg, "Recv Rsp %s: Body: '%s' Properties: %v", resp, base.UD(respBody), base.UD(resp.Properties)) } + + bsc.reportStats(false) } bsc.blipContext.HandlerForProfile[profile] = handlerFnWrapper @@ -214,6 +227,7 @@ func (bsc *BlipSyncContext) Close() { collection.changesCtxCancel() } + bsc.reportStats(true) close(bsc.terminator) }) } @@ -655,3 +669,41 @@ func toHistory(revisions Revisions, knownRevs map[string]bool, maxHistory int) [ } return history } + +// timeElapsedForStatsReporting will return true if enough time has passed since the previous report. +func (bsc *BlipSyncContext) timeElapsedForStatsReporting(currentTime int64) bool { + return (currentTime - bsc.stats.lastReportTime.Load()) > bsc.blipContextDb.Options.BlipStatsReportingInterval +} + +// reportStats will update the stats on a database immediately if updateImmediately is true, otherwise update on BlipStatsReportinInterval +func (bsc *BlipSyncContext) reportStats(updateImmediately bool) { + if bsc.blipContextDb == nil || bsc.blipContext == nil { + return + } + dbStats := bsc.blipContextDb.DbStats.Database() + if dbStats == nil { + return + } + currentTime := time.Now().UnixMilli() + if !updateImmediately && !bsc.timeElapsedForStatsReporting(currentTime) { + return + } + + bsc.stats.lock.Lock() + defer bsc.stats.lock.Unlock() + + // check a second time after acquiring the lock to see stats reporting was slow enough that a waiting mutex doesn't need to run + if !updateImmediately && !bsc.timeElapsedForStatsReporting(time.Now().UnixMilli()) { + return + } + + totalBytesSent := bsc.blipContext.GetBytesSent() + newBytesSent := totalBytesSent - bsc.stats.bytesSent.Swap(totalBytesSent) + dbStats.ReplicationBytesSent.Add(int64(newBytesSent)) + + totalBytesReceived := bsc.blipContext.GetBytesReceived() + newBytesReceived := totalBytesReceived - bsc.stats.bytesReceived.Swap(totalBytesReceived) + dbStats.ReplicationBytesReceived.Add(int64(newBytesReceived)) + bsc.stats.lastReportTime.Store(currentTime) + +} diff --git a/db/database.go b/db/database.go index e4e6b3ec7c..f7ded714b4 100644 --- a/db/database.go +++ b/db/database.go @@ -174,6 +174,8 @@ type DatabaseContextOptions struct { skipRegisterImportPIndex bool // if set, skips the global gocb PIndex registration MetadataStore base.DataStore // If set, use this location/connection for SG metadata storage - if not set, metadata is stored using the same location/connection as the bucket used for data storage. MetadataID string // MetadataID used for metadata storage + + BlipStatsReportingInterval int64 // interval to report blip stats in milliseconds } type ScopesOptions map[string]ScopeOptions diff --git a/go.mod b/go.mod index 4aac998a8c..d13419c0d2 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/coreos/go-oidc v2.2.1+incompatible github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 github.com/couchbase/clog v0.1.0 - github.com/couchbase/go-blip v0.0.0-20221021161139-215cbac22bd7 + github.com/couchbase/go-blip v0.0.0-20230510201532-fcadc404bd41 github.com/couchbase/go-couchbase v0.1.1 github.com/couchbase/gocb/v2 v2.6.2 github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911 diff --git a/go.sum b/go.sum index e41f8d6a7c..f8dfff29f7 100644 --- a/go.sum +++ b/go.sum @@ -69,8 +69,8 @@ github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 h1:tRxeXfSHBzAq6m github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46/go.mod h1:tJF3TUUO3ZDBU15auN1gNsIVY3Oo+jj46zIXH4RBxk4= github.com/couchbase/clog v0.1.0 h1:4Kh/YHkhRjMCbdQuvRVsm39XZh4FtL1d8fAwJsHrEPY= github.com/couchbase/clog v0.1.0/go.mod h1:7tzUpEOsE+fgU81yfcjy5N1H6XtbVC8SgOz/3mCjmd4= -github.com/couchbase/go-blip v0.0.0-20221021161139-215cbac22bd7 h1:/GTlMVovmGKrFAl5e7u9CXuhjTlR5a4911Ujou18Q4Q= -github.com/couchbase/go-blip v0.0.0-20221021161139-215cbac22bd7/go.mod h1:nSpldGTqAhTOaDDL0Li2dSE0smqbISKagT7fIqYIRec= +github.com/couchbase/go-blip v0.0.0-20230510201532-fcadc404bd41 h1:pjBwvGjhloggITOU9Fqg4yQ/lbZJUHnz8OsYUUczQDw= +github.com/couchbase/go-blip v0.0.0-20230510201532-fcadc404bd41/go.mod h1:nSpldGTqAhTOaDDL0Li2dSE0smqbISKagT7fIqYIRec= github.com/couchbase/go-couchbase v0.1.1 h1:ClFXELcKj/ojyoTYbsY34QUrrYCBi/1G749sXSCkdhk= github.com/couchbase/go-couchbase v0.1.1/go.mod h1:+/bddYDxXsf9qt0xpDUtRR47A2GjaXmGGAqQ/k3GJ8A= github.com/couchbase/gocb/v2 v2.6.2 h1:sZg0+3GiYW7OT53ENEGnkkQMXhVuJ1qOJplvZDlM5Xk= diff --git a/rest/blip_stats_test.go b/rest/blip_stats_test.go new file mode 100644 index 0000000000..de640ff761 --- /dev/null +++ b/rest/blip_stats_test.go @@ -0,0 +1,96 @@ +// Copyright 2023-Present Couchbase, Inc. +// +// Use of this software is governed by the Business Source License included +// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified +// in that file, in accordance with the Business Source License, use of this +// software will be governed by the Apache License, Version 2.0, included in +// the file licenses/APL2.txt. + +package rest + +import ( + "testing" + + "github.com/couchbase/sync_gateway/base" + "github.com/stretchr/testify/require" +) + +func sendGetCheckpointRequest(bt *BlipTester) { + t := bt.restTester.TB + rq := bt.newRequest() + rq.SetProfile("getCheckpoint") + require.True(t, bt.sender.Send(rq)) + errorCode, exists := rq.Response().Properties["Error-Code"] + require.True(t, exists) + require.Equal(t, "404", errorCode) +} + +// waitForStatGreaterThan will retry for up to 20 seconds until the result of getStatFunc is equal to the expected value. +func waitForStatGreaterThan(t *testing.T, getStatFunc func() int64, expected int64) { + workerFunc := func() (shouldRetry bool, err error, val interface{}) { + val = getStatFunc() + stat, ok := val.(int64) + require.True(t, ok) + return stat <= expected, nil, val + } + // wait for up to 20 seconds for the stat to meet the expected value + err, val := base.RetryLoop("waitForStatGreaterThan retry loop", workerFunc, base.CreateSleeperFunc(200, 100)) + require.NoError(t, err) + valInt64, ok := val.(int64) + require.True(t, ok) + require.Greater(t, valInt64, expected) +} + +func TestBlipStatsBasic(t *testing.T) { + bt, err := NewBlipTester(t) + require.NoError(t, err) + defer bt.Close() + + // make sure requests have not incremented stats. + /// Note: there is a blip call in NewBlipTester to initialize collections + dbStats := bt.restTester.GetDatabase().DbStats.Database() + require.Equal(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Equal(t, int64(0), dbStats.ReplicationBytesSent.Value()) + + // send a request, close BlipSyncContext and make sure stats are incremented + sendGetCheckpointRequest(bt) + + // requests shouldn't be implemented as part of handler + require.Equal(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Equal(t, int64(0), dbStats.ReplicationBytesSent.Value()) + + bt.sender.Close() + + waitForStatGreaterThan(t, dbStats.ReplicationBytesReceived.Value, 1) + waitForStatGreaterThan(t, dbStats.ReplicationBytesSent.Value, 1) + +} + +func TestBlipStatsFastReport(t *testing.T) { + bt, err := NewBlipTester(t) + require.NoError(t, err) + defer bt.Close() + sendRequest := func() { + rq := bt.newRequest() + rq.SetProfile("getCheckpoint") + require.True(t, bt.sender.Send(rq)) + errorCode, exists := rq.Response().Properties["Error-Code"] + require.True(t, exists) + require.Equal(t, "404", errorCode) + } + + dbStats := bt.restTester.GetDatabase().DbStats.Database() + require.Equal(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Equal(t, int64(0), dbStats.ReplicationBytesSent.Value()) + + sendRequest() + + require.Equal(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Equal(t, int64(0), dbStats.ReplicationBytesSent.Value()) + + // set reporting interval to update stats immediately + bt.restTester.GetDatabase().Options.BlipStatsReportingInterval = 0 + sendRequest() + require.Less(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Less(t, int64(0), dbStats.ReplicationBytesSent.Value()) +} diff --git a/rest/server_context.go b/rest/server_context.go index b3cec14df6..2b10ccd4f5 100644 --- a/rest/server_context.go +++ b/rest/server_context.go @@ -41,6 +41,9 @@ const kStatsReportInterval = time.Hour const kDefaultSlowQueryWarningThreshold = 500 // ms const KDefaultNumShards = 16 +// defaultBlipStatsReportingInterval is the default interval when to report blip stats, at the end of a message handler. +const defaultBlipStatsReportingInterval = 30 * time.Second + var errCollectionsUnsupported = base.HTTPErrorf(http.StatusBadRequest, "Named collections specified in database config, but not supported by connected Couchbase Server.") var ErrSuspendingDisallowed = errors.New("database does not allow suspending") @@ -714,6 +717,7 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config contextOptions.MetadataID = config.MetadataID } + contextOptions.BlipStatsReportingInterval = defaultBlipStatsReportingInterval.Milliseconds() // Create the DB Context dbcontext, err := db.NewDatabaseContext(ctx, dbName, bucket, autoImport, contextOptions) if err != nil { From f56fba34b5703af4d2f59e77d5d03f2aac005ace Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Thu, 11 May 2023 13:06:29 -0400 Subject: [PATCH 08/42] add x-additionalPropertiesName (#6238) --- docs/api/components/schemas.yaml | 18 ++++++++++++++++-- docs/api/paths/admin/_post_upgrade.yaml | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/docs/api/components/schemas.yaml b/docs/api/components/schemas.yaml index 310de54e9e..400a6a31f0 100644 --- a/docs/api/components/schemas.yaml +++ b/docs/api/components/schemas.yaml @@ -242,9 +242,11 @@ User: description: A set of access grants by scope and collection. type: object additionalProperties: + x-additionalPropertiesName: scopename description: An object keyed by scope, containing a set of collections. type: object additionalProperties: + x-additionalPropertiesName: collectionname $ref: '#/CollectionAccessConfig' email: description: The email address of the user. @@ -345,9 +347,11 @@ Role: description: A set of access grants by scope and collection. type: object additionalProperties: + x-additionalPropertiesName: scopename description: An object keyed by scope, containing a set of collections. type: object additionalProperties: + x-additionalPropertiesName: collectionname $ref: '#/CollectionAccessConfig' title: Role User-session-information: @@ -465,6 +469,7 @@ Document: _attachments: type: object additionalProperties: + x-additionalPropertiesName: attachmentname description: The name of the attachment. type: object properties: @@ -530,6 +535,7 @@ Design-doc: views: type: object additionalProperties: + x-additionalPropertiesName: viewname description: The name of the view. type: object properties: @@ -1084,6 +1090,7 @@ Scopes: description: An object keyed by collection name containing config for the specific collection. type: object additionalProperties: + x-additionalPropertiesName: collectionname $ref: '#/CollectionConfig' title: Scopes CollectionConfig: @@ -1162,6 +1169,7 @@ Database: description: An object keyed by scope name containing config for the specific collection. type: object additionalProperties: + x-additionalPropertiesName: scopename $ref: '#/Scopes' name: description: The name of the database. @@ -1174,9 +1182,11 @@ Database: default: 'function(doc){channel(doc.channels);}' users: additionalProperties: + x-additionalPropertiesName: username $ref: '#/User' roles: additionalProperties: + x-additionalPropertiesName: rolename $ref: '#/Role' revs_limit: description: |- @@ -1237,8 +1247,8 @@ Database: type: string default: DCP enum: - - TAP - DCP + - TAP deprecated: true allow_empty_password: description: This controls whether users that are created can have an empty password or not. @@ -1449,6 +1459,7 @@ Database: description: Configuration for Local JWT authentication. type: object additionalProperties: + x-additionalPropertiesName: providername description: The providers name. type: object required: ['issuer', 'client_id', 'algorithms', 'keys'] @@ -1535,6 +1546,7 @@ Database: description: List of OpenID Connect issuers. type: object additionalProperties: + x-additionalPropertiesName: providername description: The providers name. type: object properties: @@ -1791,7 +1803,7 @@ Event-config: description: The options for the event. type: object additionalProperties: - description: The option key and value. + description: The option key and value. title: Event-config Resync-status: description: The status of a resync operation @@ -2151,12 +2163,14 @@ Startup-config: description: 'A map of database name to credentials, that can be used instead of the bootstrap ones.' type: object additionalProperties: + x-additionalPropertiesName: databasename $ref: '#/CredentialsConfig' readOnly: true bucket_credentials: description: 'A map of bucket names to credentials, that can be used instead of the bootstrap ones.' type: object additionalProperties: + x-additionalPropertiesName: bucketname $ref: '#/CredentialsConfig' readOnly: true max_file_descriptors: diff --git a/docs/api/paths/admin/_post_upgrade.yaml b/docs/api/paths/admin/_post_upgrade.yaml index cb803eec47..9f4c23b3ea 100644 --- a/docs/api/paths/admin/_post_upgrade.yaml +++ b/docs/api/paths/admin/_post_upgrade.yaml @@ -32,6 +32,7 @@ post: description: A map of databases. type: object additionalProperties: + x-additionalPropertiesName: db description: The name of the database that was targetted. type: object properties: From b9a08edcd7923d55a552ac7a28faa1c85a5a49b6 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Fri, 12 May 2023 21:20:30 +0100 Subject: [PATCH 09/42] CBG-2938: Ignore Cbgt EOF feed errors when intentionally stopped (#6235) * Use sgMgrEventHandlers context to add cancellation handling for EOF feed errors * Move all importListener.Stop cbgtContext code into CbgtContext.Stop() --- base/dcp_sharded.go | 50 ++++++++++++++++++++++++++++++++----------- db/import_listener.go | 14 +----------- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/base/dcp_sharded.go b/base/dcp_sharded.go index 7d5272a924..f124981d7f 100644 --- a/base/dcp_sharded.go +++ b/base/dcp_sharded.go @@ -52,6 +52,9 @@ type CbgtContext struct { Cfg cbgt.Cfg // Cfg manages storage of the current pindex set and node assignment heartbeater Heartbeater // Heartbeater used for failed node detection heartbeatListener *importHeartbeatListener // Listener subscribed to failed node alerts from heartbeater + eventHandlers *sgMgrEventHandlers // Event handler callbacks + ctx context.Context // Log context + dbName string // Database name } // StartShardedDCPFeed initializes and starts a CBGT Manager targeting the provided bucket. @@ -316,7 +319,8 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG // avoids file system usage, in conjunction with managerLoadDataDir=false in options. dataDir := "" - eventHandlers := &sgMgrEventHandlers{ctx: ctx} + eventHandlersCtx, eventHandlersCancel := context.WithCancel(ctx) + eventHandlers := &sgMgrEventHandlers{ctx: eventHandlersCtx, ctxCancel: eventHandlersCancel} // Specify one feed per pindex options := make(map[string]string) @@ -347,8 +351,11 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG options) cbgtContext := &CbgtContext{ - Manager: mgr, - Cfg: cfgSG, + Manager: mgr, + Cfg: cfgSG, + eventHandlers: eventHandlers, + ctx: ctx, + dbName: dbName, } if spec.Auth != nil || (spec.Certpath != "" && spec.Keypath != "") { @@ -439,13 +446,28 @@ func getMinNodeVersion(cfg cbgt.Cfg) (*ComparableVersion, error) { return minVersion, nil } -// StopHeartbeatListener unregisters the listener from the heartbeater, and stops it. -func (c *CbgtContext) StopHeartbeatListener() { +// Stop unregisters the listener from the heartbeater, and stops it and associated handlers. +func (c *CbgtContext) Stop() { + if c.eventHandlers != nil { + c.eventHandlers.ctxCancel() + } if c.heartbeatListener != nil { c.heartbeater.UnregisterListener(c.heartbeatListener.Name()) c.heartbeatListener.Stop() } + + // Close open PIndexes before stopping the manager. + _, pindexes := c.Manager.CurrentMaps() + for _, pIndex := range pindexes { + err := c.Manager.ClosePIndex(pIndex) + if err != nil { + DebugfCtx(c.ctx, KeyImport, "Error closing pindex: %v", err) + } + } + // ClosePIndex calls are synchronous, so can stop manager once they've completed + c.Manager.Stop() + c.RemoveFeedCredentials(c.dbName) } func (c *CbgtContext) RemoveFeedCredentials(dbName string) { @@ -696,7 +718,8 @@ func GetDefaultImportPartitions(serverless bool) uint16 { } type sgMgrEventHandlers struct { - ctx context.Context + ctx context.Context + ctxCancel context.CancelFunc } func (meh *sgMgrEventHandlers) OnRefreshManagerOptions(options map[string]string) { @@ -715,20 +738,20 @@ func (meh *sgMgrEventHandlers) OnUnregisterPIndex(pindex *cbgt.PIndex) { // Handling below based on cbft implementation - checks whether the underlying source (bucket) // still exists with VerifySourceNotExists, and if it exists, calls NotifyMgrOnClose. // This will trigger cbgt closing and then attempting to reconnect to the feed. -func (meh *sgMgrEventHandlers) OnFeedError(srcType string, r cbgt.Feed, err error) { +func (meh *sgMgrEventHandlers) OnFeedError(srcType string, r cbgt.Feed, feedErr error) { DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, srcType: %s, feed name: %s, err: %v", - srcType, r.Name(), err) + srcType, r.Name(), feedErr) dcpFeed, ok := r.(cbgt.FeedEx) if !ok { return } - gone, indexUUID, er := dcpFeed.VerifySourceNotExists() + gone, indexUUID, err := dcpFeed.VerifySourceNotExists() DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, VerifySourceNotExists,"+ " srcType: %s, gone: %t, indexUUID: %s, err: %v", - srcType, gone, indexUUID, er) + srcType, gone, indexUUID, err) if !gone { // If we get an EOF error from the feeds and the bucket is still alive, // then there could at the least two potential error scenarios. @@ -741,8 +764,11 @@ func (meh *sgMgrEventHandlers) OnFeedError(srcType string, r cbgt.Feed, err erro // the connectivity problems either during the next rebalance // (new kv node after failover-recovery rebalance) or // on the next janitor work cycle(ephemeral network issue to the same node). - if strings.Contains(err.Error(), "EOF") { - InfofCtx(meh.ctx, KeyDCP, "Handling EOF on cbgt feed - notifying manager to trigger reconnection to feed. indexUUID: %v, err: %v", indexUUID, err) + if strings.Contains(feedErr.Error(), "EOF") { + // If this wasn't an intentional close, log about the EOF + if meh.ctx.Err() != context.Canceled { + InfofCtx(meh.ctx, KeyDCP, "Handling EOF on cbgt feed - notifying manager to trigger reconnection to feed. indexUUID: %v, err: %v", indexUUID, feedErr) + } dcpFeed.NotifyMgrOnClose() } } diff --git a/db/import_listener.go b/db/import_listener.go index 72ef8a8734..7cb9142bcc 100644 --- a/db/import_listener.go +++ b/db/import_listener.go @@ -214,19 +214,7 @@ func (il *importListener) ImportFeedEvent(event sgbucket.FeedEvent) { func (il *importListener) Stop() { if il != nil { if il.cbgtContext != nil { - il.cbgtContext.StopHeartbeatListener() - - // Close open PIndexes before stopping the manager. - _, pindexes := il.cbgtContext.Manager.CurrentMaps() - for _, pIndex := range pindexes { - err := il.cbgtContext.Manager.ClosePIndex(pIndex) - if err != nil { - base.DebugfCtx(il.loggingCtx, base.KeyImport, "Error closing pindex: %v", err) - } - } - // ClosePIndex calls are synchronous, so can stop manager once they've completed - il.cbgtContext.Manager.Stop() - il.cbgtContext.RemoveFeedCredentials(il.dbName) + il.cbgtContext.Stop() // Remove entry from global listener directory base.RemoveDestFactory(il.importDestKey) From 1bdd12cf6c7304b2e69d2e785a9d9c7e0499d277 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Mon, 15 May 2023 15:44:07 +0100 Subject: [PATCH 10/42] Rename TestStatusAfterReplicationRebalanceFail to avoid 'Fail:' search result (#6244) --- rest/replicatortest/replicator_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rest/replicatortest/replicator_test.go b/rest/replicatortest/replicator_test.go index ac6701dcca..e829fa1f61 100644 --- a/rest/replicatortest/replicator_test.go +++ b/rest/replicatortest/replicator_test.go @@ -781,7 +781,8 @@ func TestReplicationStatusActions(t *testing.T) { } -func TestStatusAfterReplicationRebalanceFail(t *testing.T) { +// TestReplicationRebalanceToZeroNodes checks that the replication goes into an unassigned state when there are no nodes available to run replications. +func TestReplicationRebalanceToZeroNodes(t *testing.T) { base.SetUpTestLogging(t, base.LevelDebug, base.KeyAll) activeRT, remoteRT, _, teardown := rest.SetupSGRPeers(t) defer teardown() From 36a3204bc6720ad6e6075c4fb2c71d1fa64fbffb Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Mon, 15 May 2023 07:47:27 -0700 Subject: [PATCH 11/42] CBG-2853 Allow one-shot replications to wait for DCP to catch up on changes feed (#6243) * CBG-2853 Add requestPlus option for changes feeds Adds requestPlus option for changes feeds. When set, changes feeds will loop until the cached sequence (via DCP) is greater than the database sequence at the time the changes request was issued. requestPlus can be enabled for non-continuous changes requests in one of three ways: - by setting request_plus=true on a REST API changes call - by setting the requestPlus property to "true" on a subChanges message - by setting "changes_request_plus":true in the database config (default=false) The request setting is given priority - if not set on a request, the value will fall back to the database config value. Required minor refactoring of how options.Wait was used in changes.go, to support use of requestPlus and longpoll together. No functional changes to longpoll if requestPlus is not set. * Update docs for request_plus changes parameter. * lint fixes --- db/blip_handler.go | 32 ++- db/blip_sync_messages.go | 12 +- db/changes.go | 55 ++-- db/database.go | 11 +- db/util_testing.go | 21 ++ docs/api/paths/admin/keyspace-_changes.yaml | 10 + rest/blip_api_crud_test.go | 119 ++++++++ rest/blip_client_test.go | 65 ++++- rest/changes_api.go | 43 ++- rest/changestest/changes_api_test.go | 291 ++++++++++++++++++++ rest/config.go | 1 + rest/server_context.go | 1 + 12 files changed, 615 insertions(+), 46 deletions(-) diff --git a/db/blip_handler.go b/db/blip_handler.go index 109bbeb871..90c35d9840 100644 --- a/db/blip_handler.go +++ b/db/blip_handler.go @@ -295,6 +295,19 @@ func (bh *blipHandler) handleSubChanges(rq *blip.Message) error { continuous := subChangesParams.continuous() + requestPlusSeq := uint64(0) + // If non-continuous, check whether requestPlus handling is set for request or via database config + if continuous == false { + useRequestPlus := subChangesParams.requestPlus(bh.db.Options.ChangesRequestPlus) + if useRequestPlus { + seq, requestPlusErr := bh.db.GetRequestPlusSequence() + if requestPlusErr != nil { + return base.HTTPErrorf(http.StatusServiceUnavailable, "Unable to retrieve current sequence for requestPlus=true: %v", requestPlusErr) + } + requestPlusSeq = seq + } + } + // Start asynchronous changes goroutine go func() { // Pull replication stats by type @@ -325,6 +338,7 @@ func (bh *blipHandler) handleSubChanges(rq *blip.Message) error { clientType: clientType, ignoreNoConflicts: clientType == clientTypeSGR2, // force this side to accept a "changes" message, even in no conflicts mode for SGR2. changesCtx: collectionCtx.changesCtx, + requestPlusSeq: requestPlusSeq, }) base.DebugfCtx(bh.loggingCtx, base.KeySyncMsg, "#%d: Type:%s --> Time:%v", bh.serialNumber, rq.Profile(), time.Since(startTime)) }() @@ -358,6 +372,7 @@ type sendChangesOptions struct { revocations bool ignoreNoConflicts bool changesCtx context.Context + requestPlusSeq uint64 } type changesDeletedFlag uint @@ -385,14 +400,15 @@ func (bh *blipHandler) sendChanges(sender *blip.Sender, opts *sendChangesOptions base.InfofCtx(bh.loggingCtx, base.KeySync, "Sending changes since %v", opts.since) options := ChangesOptions{ - Since: opts.since, - Conflicts: false, // CBL 2.0/BLIP don't support branched rev trees (LiteCore #437) - Continuous: opts.continuous, - ActiveOnly: opts.activeOnly, - Revocations: opts.revocations, - LoggingCtx: bh.loggingCtx, - clientType: opts.clientType, - ChangesCtx: opts.changesCtx, + Since: opts.since, + Conflicts: false, // CBL 2.0/BLIP don't support branched rev trees (LiteCore #437) + Continuous: opts.continuous, + ActiveOnly: opts.activeOnly, + Revocations: opts.revocations, + LoggingCtx: bh.loggingCtx, + clientType: opts.clientType, + ChangesCtx: opts.changesCtx, + RequestPlusSeq: opts.requestPlusSeq, } channelSet := opts.channels diff --git a/db/blip_sync_messages.go b/db/blip_sync_messages.go index 381591be0b..1e2a161e73 100644 --- a/db/blip_sync_messages.go +++ b/db/blip_sync_messages.go @@ -68,6 +68,8 @@ const ( SubChangesContinuous = "continuous" SubChangesBatch = "batch" SubChangesRevocations = "revocations" + SubChangesRequestPlus = "requestPlus" + SubChangesFuture = "future" // rev message properties RevMessageID = "id" @@ -163,7 +165,7 @@ func NewSubChangesParams(logCtx context.Context, rq *blip.Message, zeroSeq Seque // Determine incoming since and docIDs once, since there is some overhead associated with their calculation sinceSequenceId := zeroSeq var err error - if rq.Properties["future"] == trueProperty { + if rq.Properties[SubChangesFuture] == trueProperty { sinceSequenceId, err = latestSeq() } else if sinceStr, found := rq.Properties[SubChangesSince]; found { if sinceSequenceId, err = sequenceIDParser(sinceStr); err != nil { @@ -234,6 +236,14 @@ func (s *SubChangesParams) activeOnly() bool { return (s.rq.Properties[SubChangesActiveOnly] == trueProperty) } +func (s *SubChangesParams) requestPlus(defaultValue bool) (value bool) { + propertyValue, isDefined := s.rq.Properties[SubChangesRequestPlus] + if !isDefined { + return defaultValue + } + return propertyValue == trueProperty +} + func (s *SubChangesParams) filter() string { return s.rq.Properties[SubChangesFilter] } diff --git a/db/changes.go b/db/changes.go index d430d51cda..53b1df1cc3 100644 --- a/db/changes.go +++ b/db/changes.go @@ -26,19 +26,20 @@ import ( // Options for changes-feeds. ChangesOptions must not contain any mutable pointer references, as // changes processing currently assumes a deep copy when doing chanOpts := changesOptions. type ChangesOptions struct { - Since SequenceID // sequence # to start _after_ - Limit int // Max number of changes to return, if nonzero - Conflicts bool // Show all conflicting revision IDs, not just winning one? - IncludeDocs bool // Include doc body of each change? - Wait bool // Wait for results, instead of immediately returning empty result? - Continuous bool // Run continuously until terminated? - HeartbeatMs uint64 // How often to send a heartbeat to the client - TimeoutMs uint64 // After this amount of time, close the longpoll connection - ActiveOnly bool // If true, only return information on non-deleted, non-removed revisions - Revocations bool // Specifies whether revocation messages should be sent on the changes feed - clientType clientType // Can be used to determine if the replication is being started from a CBL 2.x or SGR2 client - LoggingCtx context.Context // Used for adding context to logs - ChangesCtx context.Context // Used for cancelling checking the changes feed should stop + Since SequenceID // sequence # to start _after_ + Limit int // Max number of changes to return, if nonzero + Conflicts bool // Show all conflicting revision IDs, not just winning one? + IncludeDocs bool // Include doc body of each change? + Wait bool // Wait for results, instead of immediately returning empty result? + Continuous bool // Run continuously until terminated? + RequestPlusSeq uint64 // Do not stop changes before cached sequence catches up with requestPlusSeq + HeartbeatMs uint64 // How often to send a heartbeat to the client + TimeoutMs uint64 // After this amount of time, close the longpoll connection + ActiveOnly bool // If true, only return information on non-deleted, non-removed revisions + Revocations bool // Specifies whether revocation messages should be sent on the changes feed + clientType clientType // Can be used to determine if the replication is being started from a CBL 2.x or SGR2 client + LoggingCtx context.Context // Used for adding context to logs + ChangesCtx context.Context // Used for cancelling checking the changes feed should stop } // A changes entry; Database.GetChanges returns an array of these. @@ -629,8 +630,9 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex var changeWaiter *ChangeWaiter var lowSequence uint64 - var currentCachedSequence uint64 + var currentCachedSequence uint64 // The highest contiguous sequence buffered over the caching feed var lateSequenceFeeds map[channels.ID]*lateSequenceFeed + var useLateSequenceFeeds bool // LateSequence feeds are only used for continuous, or one-shot where options.RequestPlusSeq > currentCachedSequence var userCounter uint64 // Wait counter used to identify changes to the user document var changedChannels channels.ChangedKeys // Tracks channels added/removed to the user during changes processing. var userChanged bool // Whether the user document has changed in a given iteration loop @@ -638,9 +640,9 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex // Retrieve the current max cached sequence - ensures there isn't a race between the subsequent channel cache queries currentCachedSequence = col.changeCache().getChannelCache().GetHighCacheSequence() - if options.Wait { - options.Wait = false + // If changes feed requires more than one ChangesLoop iteration, initialize changeWaiter + if options.Wait || options.RequestPlusSeq > currentCachedSequence { changeWaiter = col.startChangeWaiter() // Waiter is updated with the actual channel set (post-user reload) at the start of the outer changes loop userCounter = changeWaiter.CurrentUserCount() // Reload user to pick up user changes that happened between auth and the change waiter @@ -676,7 +678,8 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex // For a continuous feed, initialise the lateSequenceFeeds that track late-arriving sequences // to the channel caches. - if options.Continuous { + if options.Continuous || options.RequestPlusSeq > currentCachedSequence { + useLateSequenceFeeds = true lateSequenceFeeds = make(map[channels.ID]*lateSequenceFeed) defer col.closeLateFeeds(lateSequenceFeeds) } @@ -741,7 +744,7 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex // Handles previously skipped sequences prior to options.Since that // have arrived in the channel cache since this changes request started. Only needed for // continuous feeds - one-off changes requests only require the standard channel cache. - if options.Continuous { + if useLateSequenceFeeds { lateSequenceFeedHandler := lateSequenceFeeds[chanID] if lateSequenceFeedHandler != nil { latefeed, err := col.getLateFeed(lateSequenceFeedHandler, singleChannelCache) @@ -957,14 +960,19 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex } } } - if !options.Continuous && (sentSomething || changeWaiter == nil) { - break + + // Check whether non-continuous changes feeds that aren't waiting to reach requestPlus sequence can exit + if !options.Continuous && currentCachedSequence >= options.RequestPlusSeq { + // If non-longpoll, or longpoll has sent something, can exit + if !options.Wait || sentSomething { + break + } } // For longpoll requests that didn't send any results, reset low sequence to the original since value, // as the system low sequence may change before the longpoll request wakes up, and longpoll feeds don't // use lateSequenceFeeds. - if !options.Continuous { + if !useLateSequenceFeeds { options.Since.LowSeq = requestLowSeq } @@ -981,6 +989,7 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex waitForChanges: for { + col.dbStats().CBLReplicationPull().NumPullReplTotalCaughtUp.Add(1) // If we're in a deferred Backfill, the user may not get notification when the cache catches up to the backfill (e.g. when the granting doc isn't // visible to the user), and so ChangeWaiter.Wait() would block until the next user-visible doc arrives. Use a hardcoded wait instead // Similar handling for when we see sequences later than the stable sequence. @@ -992,7 +1001,6 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex break waitForChanges } - col.dbStats().CBLReplicationPull().NumPullReplTotalCaughtUp.Add(1) col.dbStats().CBLReplicationPull().NumPullReplCaughtUp.Add(1) waitResponse := changeWaiter.Wait() col.dbStats().CBLReplicationPull().NumPullReplCaughtUp.Add(-1) @@ -1310,7 +1318,7 @@ func createChangesEntry(ctx context.Context, docid string, db *DatabaseCollectio func (options ChangesOptions) String() string { return fmt.Sprintf( - `{Since: %s, Limit: %d, Conflicts: %t, IncludeDocs: %t, Wait: %t, Continuous: %t, HeartbeatMs: %d, TimeoutMs: %d, ActiveOnly: %t}`, + `{Since: %s, Limit: %d, Conflicts: %t, IncludeDocs: %t, Wait: %t, Continuous: %t, HeartbeatMs: %d, TimeoutMs: %d, ActiveOnly: %t, RequestPlusSeq: %d}`, options.Since, options.Limit, options.Conflicts, @@ -1320,6 +1328,7 @@ func (options ChangesOptions) String() string { options.HeartbeatMs, options.TimeoutMs, options.ActiveOnly, + options.RequestPlusSeq, ) } diff --git a/db/database.go b/db/database.go index f7ded714b4..fcd4f21908 100644 --- a/db/database.go +++ b/db/database.go @@ -174,8 +174,8 @@ type DatabaseContextOptions struct { skipRegisterImportPIndex bool // if set, skips the global gocb PIndex registration MetadataStore base.DataStore // If set, use this location/connection for SG metadata storage - if not set, metadata is stored using the same location/connection as the bucket used for data storage. MetadataID string // MetadataID used for metadata storage - - BlipStatsReportingInterval int64 // interval to report blip stats in milliseconds + BlipStatsReportingInterval int64 // interval to report blip stats in milliseconds + ChangesRequestPlus bool // Sets the default value for request_plus, for non-continuous changes feeds } type ScopesOptions map[string]ScopeOptions @@ -2326,3 +2326,10 @@ func (dbc *DatabaseContext) AuthenticatorOptions() auth.AuthenticatorOptions { defaultOptions.MetaKeys = dbc.MetadataKeys return defaultOptions } + +// GetRequestPlusSequence fetches the current value of the sequence counter for the database. +// Uses getSequence (instead of lastSequence) as it's intended to be up to date with allocations +// across all nodes, while lastSequence is just the latest allocation from this node +func (dbc *DatabaseContext) GetRequestPlusSequence() (uint64, error) { + return dbc.sequences.getSequence() +} diff --git a/db/util_testing.go b/db/util_testing.go index 67433123c8..63f6814d08 100644 --- a/db/util_testing.go +++ b/db/util_testing.go @@ -95,6 +95,17 @@ func (db *DatabaseContext) WaitForCaughtUp(targetCount int64) error { return errors.New("WaitForCaughtUp didn't catch up") } +func (db *DatabaseContext) WaitForTotalCaughtUp(targetCount int64) error { + for i := 0; i < 100; i++ { + caughtUpCount := db.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + if caughtUpCount >= targetCount { + return nil + } + time.Sleep(100 * time.Millisecond) + } + return errors.New("WaitForCaughtUp didn't catch up") +} + type StatWaiter struct { initCount int64 // Document cached count when NewStatWaiter is called targetCount int64 // Target count used when Wait is called @@ -598,3 +609,13 @@ func GetSingleDatabaseCollection(tb testing.TB, database *DatabaseContext) *Data tb.Fatalf("Could not find a collection") return nil } + +// AllocateTestSequence allocates a sequence via the sequenceAllocator. For use by non-db tests +func AllocateTestSequence(database *DatabaseContext) (uint64, error) { + return database.sequences.incrementSequence(1) +} + +// ReleaseTestSequence releases a sequence via the sequenceAllocator. For use by non-db tests +func ReleaseTestSequence(database *DatabaseContext, sequence uint64) error { + return database.sequences.releaseSequence(sequence) +} diff --git a/docs/api/paths/admin/keyspace-_changes.yaml b/docs/api/paths/admin/keyspace-_changes.yaml index 2ddb95633a..1c3819be88 100644 --- a/docs/api/paths/admin/keyspace-_changes.yaml +++ b/docs/api/paths/admin/keyspace-_changes.yaml @@ -96,6 +96,13 @@ get: - longpoll - continuous - websocket + + - name: request_plus + in: query + description: When true, ensures all valid documents written prior to the request being issued are included in the response. This is only applicable for non-continuous feeds. + schema: + type: boolean + default: 'false' responses: '200': $ref: ../../components/responses.yaml#/changes-feed @@ -156,6 +163,9 @@ post: feed: description: 'The type of changes feed to use. ' type: string + request_plus: + description: 'When true, ensures all valid documents written prior to the request being issued are included in the response. This is only applicable for non-continuous feeds.' + type: string responses: '200': $ref: ../../components/responses.yaml#/changes-feed diff --git a/rest/blip_api_crud_test.go b/rest/blip_api_crud_test.go index 356b51074a..bae3c89a18 100644 --- a/rest/blip_api_crud_test.go +++ b/rest/blip_api_crud_test.go @@ -2652,3 +2652,122 @@ func TestUnsubChanges(t *testing.T) { _, found = btc.WaitForRev("doc2", resp.Rev) assert.True(t, found) } + +// TestRequestPlusPull tests that a one-shot pull replication waits for pending changes when request plus is set on the replication. +func TestRequestPlusPull(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelInfo, base.KeyDCP, base.KeyChanges, base.KeyHTTP) + defer db.SuspendSequenceBatching()() // Required for slow sequence simulation + + rtConfig := RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + } + rt := NewRestTester(t, &rtConfig) + defer rt.Close() + database := rt.GetDatabase() + + // Initialize blip tester client (will create user) + client, err := NewBlipTesterClientOptsWithRT(t, rt, &BlipTesterClientOpts{ + Username: "bernard", + }) + require.NoError(t, err) + defer client.Close() + + // Put a doc in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + RequireStatus(t, response, 201) + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + + // Write a document granting user 'bernard' access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + RequireStatus(t, response, 201) + + caughtUpStart := database.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + + // Start a regular one-shot pull + err = client.StartOneshotPullRequestPlus() + assert.NoError(t, err) + + // Wait for the one-shot changes feed to go into wait mode before releasing the slow sequence + require.NoError(t, database.WaitForTotalCaughtUp(caughtUpStart+1)) + + // Release the slow sequence + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + + // The one-shot pull should unblock and replicate the document in the granted channel + data, ok := client.WaitForDoc("pbs-1") + assert.True(t, ok) + assert.Equal(t, `{"channel":["PBS"]}`, string(data)) + +} + +// TestRequestPlusPull tests that a one-shot pull replication waits for pending changes when request plus is set on the db config. +func TestRequestPlusPullDbConfig(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelInfo, base.KeyDCP, base.KeyChanges, base.KeyHTTP) + defer db.SuspendSequenceBatching()() // Required for slow sequence simulation + + rtConfig := RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + DatabaseConfig: &DatabaseConfig{ + DbConfig: DbConfig{ + ChangesRequestPlus: base.BoolPtr(true), + }, + }, + } + rt := NewRestTester(t, &rtConfig) + defer rt.Close() + database := rt.GetDatabase() + + // Initialize blip tester client (will create user) + client, err := NewBlipTesterClientOptsWithRT(t, rt, &BlipTesterClientOpts{ + Username: "bernard", + }) + require.NoError(t, err) + defer client.Close() + + // Put a doc in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + RequireStatus(t, response, 201) + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + + // Write a document granting user 'bernard' access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + RequireStatus(t, response, 201) + + caughtUpStart := database.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + + // Start a regular one-shot pull + err = client.StartOneshotPull() + assert.NoError(t, err) + + // Wait for the one-shot changes feed to go into wait mode before releasing the slow sequence + require.NoError(t, database.WaitForTotalCaughtUp(caughtUpStart+1)) + + // Release the slow sequence + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + + // The one-shot pull should unblock and replicate the document in the granted channel + data, ok := client.WaitForDoc("pbs-1") + assert.True(t, ok) + assert.Equal(t, `{"channel":["PBS"]}`, string(data)) + +} diff --git a/rest/blip_client_test.go b/rest/blip_client_test.go index 13558aee7e..0b09af007b 100644 --- a/rest/blip_client_test.go +++ b/rest/blip_client_test.go @@ -632,19 +632,23 @@ func (btc *BlipTesterClient) Collection(collectionName string) *BlipTesterCollec // StartPull will begin a continuous pull replication since 0 between the client and server func (btcc *BlipTesterCollectionClient) StartPull() (err error) { - return btcc.StartPullSince("true", "0", "false", "") + return btcc.StartPullSince("true", "0", "false", "", "") } func (btcc *BlipTesterCollectionClient) StartOneshotPull() (err error) { - return btcc.StartPullSince("false", "0", "false", "") + return btcc.StartPullSince("false", "0", "false", "", "") } func (btcc *BlipTesterCollectionClient) StartOneshotPullFiltered(channels string) (err error) { - return btcc.StartPullSince("false", "0", "false", channels) + return btcc.StartPullSince("false", "0", "false", channels, "") +} + +func (btcc *BlipTesterCollectionClient) StartOneshotPullRequestPlus() (err error) { + return btcc.StartPullSince("false", "0", "false", "", "true") } // StartPullSince will begin a pull replication between the client and server with the given params. -func (btc *BlipTesterCollectionClient) StartPullSince(continuous, since, activeOnly string, channels string) (err error) { +func (btc *BlipTesterCollectionClient) StartPullSince(continuous, since, activeOnly, channels, requestPlus string) (err error) { subChangesRequest := blip.NewRequest() subChangesRequest.SetProfile(db.MessageSubChanges) subChangesRequest.Properties[db.SubChangesContinuous] = continuous @@ -654,6 +658,9 @@ func (btc *BlipTesterCollectionClient) StartPullSince(continuous, since, activeO subChangesRequest.Properties[db.SubChangesFilter] = base.ByChannelFilter subChangesRequest.Properties[db.SubChangesChannels] = channels } + if requestPlus != "" { + subChangesRequest.Properties[db.SubChangesRequestPlus] = requestPlus + } subChangesRequest.SetNoReply(true) if btc.parent.BlipTesterClientOpts.SendRevocations { @@ -923,6 +930,9 @@ func (btc *BlipTesterCollectionClient) GetRev(docID, revID string) (data []byte, // WaitForRev blocks until the given doc ID and rev ID have been stored by the client, and returns the data when found. func (btc *BlipTesterCollectionClient) WaitForRev(docID, revID string) (data []byte, found bool) { + if data, found := btc.GetRev(docID, revID); found { + return data, found + } ticker := time.NewTicker(50 * time.Millisecond) timeout := time.After(10 * time.Second) for { @@ -938,6 +948,41 @@ func (btc *BlipTesterCollectionClient) WaitForRev(docID, revID string) (data []b } } +// GetDoc returns a rev stored in the Client under the given docID. (if multiple revs are present, rev body returned is non-deterministic) +func (btc *BlipTesterCollectionClient) GetDoc(docID string) (data []byte, found bool) { + btc.docsLock.RLock() + defer btc.docsLock.RUnlock() + + if rev, ok := btc.docs[docID]; ok { + for _, data := range rev { + return data.body, true + } + } + + return nil, false +} + +// WaitForDoc blocks until the given doc ID has been stored by the client, and returns the data when found. +func (btc *BlipTesterCollectionClient) WaitForDoc(docID string) (data []byte, found bool) { + + if data, found := btc.GetDoc(docID); found { + return data, found + } + ticker := time.NewTicker(50 * time.Millisecond) + timeout := time.After(10 * time.Second) + for { + select { + case <-timeout: + btc.parent.rt.TB.Fatalf("BlipTesterClient timed out waiting for doc ID: %v", docID) + return nil, false + case <-ticker.C: + if data, found := btc.GetDoc(docID); found { + return data, found + } + } + } +} + // GetMessage returns the message stored in the Client under the given serial number func (btr *BlipTesterReplicator) GetMessage(serialNumber blip.MessageNumber) (msg *blip.Message, found bool) { btr.messagesLock.RLock() @@ -1026,6 +1071,10 @@ func (btc *BlipTesterClient) WaitForRev(docID string, revID string) ([]byte, boo return btc.SingleCollection().WaitForRev(docID, revID) } +func (btc *BlipTesterClient) WaitForDoc(docID string) ([]byte, bool) { + return btc.SingleCollection().WaitForDoc(docID) +} + func (btc *BlipTesterClient) WaitForBlipRevMessage(docID string, revID string) (*blip.Message, bool) { return btc.SingleCollection().WaitForBlipRevMessage(docID, revID) } @@ -1038,16 +1087,20 @@ func (btc *BlipTesterClient) StartOneshotPullFiltered(channels string) error { return btc.SingleCollection().StartOneshotPullFiltered(channels) } +func (btc *BlipTesterClient) StartOneshotPullRequestPlus() error { + return btc.SingleCollection().StartOneshotPullRequestPlus() +} + func (btc *BlipTesterClient) PushRev(docID string, revID string, body []byte) (string, error) { return btc.SingleCollection().PushRev(docID, revID, body) } func (btc *BlipTesterClient) StartPullSince(continuous, since, activeOnly string) error { - return btc.SingleCollection().StartPullSince(continuous, since, activeOnly, "") + return btc.SingleCollection().StartPullSince(continuous, since, activeOnly, "", "") } func (btc *BlipTesterClient) StartFilteredPullSince(continuous, since, activeOnly string, channels string) error { - return btc.SingleCollection().StartPullSince(continuous, since, activeOnly, channels) + return btc.SingleCollection().StartPullSince(continuous, since, activeOnly, channels, "") } func (btc *BlipTesterClient) GetRev(docID, revID string) ([]byte, bool) { diff --git a/rest/changes_api.go b/rest/changes_api.go index 408ca401b9..b1f39173f3 100644 --- a/rest/changes_api.go +++ b/rest/changes_api.go @@ -37,6 +37,12 @@ const kDefaultTimeoutMS = 5 * 60 * 1000 // Maximum value of _changes?timeout property const kMaxTimeoutMS = 15 * 60 * 1000 +// Values for feed parameter on changes request +const feedTypeContinuous = "continuous" +const feedTypeLongpoll = "longpoll" +const feedTypeNormal = "normal" +const feedTypeWebsocket = "websocket" + func (h *handler) handleRevsDiff() error { var input map[string][]string err := h.readJSONInto(&input) @@ -180,6 +186,16 @@ func (h *handler) handleChanges() error { options.ActiveOnly = h.getBoolQuery("active_only") options.IncludeDocs = h.getBoolQuery("include_docs") options.Revocations = h.getBoolQuery("revocations") + + useRequestPlus, _ := h.getOptBoolQuery("request_plus", h.db.Options.ChangesRequestPlus) + if useRequestPlus && feed != feedTypeContinuous { + var seqErr error + options.RequestPlusSeq, seqErr = h.db.GetRequestPlusSequence() + if seqErr != nil { + return base.HTTPErrorf(http.StatusServiceUnavailable, "Unable to retrieve requestPlus sequence") + } + + } filter = h.getQuery("filter") channelsParam := h.getQuery("channels") if channelsParam != "" { @@ -312,18 +328,18 @@ func (h *handler) handleChanges() error { var err error switch feed { - case "normal": + case feedTypeNormal: if filter == "_doc_ids" { err, forceClose = h.sendSimpleChanges(userChannels, options, docIdsArray) } else { err, forceClose = h.sendSimpleChanges(userChannels, options, nil) } - case "longpoll": + case feedTypeLongpoll: options.Wait = true err, forceClose = h.sendSimpleChanges(userChannels, options, nil) - case "continuous": + case feedTypeContinuous: err, forceClose = h.sendContinuousChangesByHTTP(userChannels, options) - case "websocket": + case feedTypeWebsocket: err, forceClose = h.sendContinuousChangesByWebSocket(userChannels, options) default: err = base.HTTPErrorf(http.StatusBadRequest, "Unknown feed type") @@ -454,7 +470,7 @@ func (h *handler) generateContinuousChanges(inChannels base.Set, options db.Chan options.Continuous = true err, forceClose := db.GenerateChanges(h.ctx(), h.rq.Context(), h.collection, inChannels, options, nil, send) if sendErr, ok := err.(*db.ChangesSendErr); ok { - h.logStatus(http.StatusOK, fmt.Sprintf("0Write error: %v", sendErr)) + h.logStatus(http.StatusOK, fmt.Sprintf("Write error: %v", sendErr)) return nil, forceClose // error is probably because the client closed the connection } else { h.logStatus(http.StatusOK, "OK (continuous feed closed)") @@ -580,7 +596,8 @@ func (h *handler) readChangesOptionsFromJSON(jsonData []byte) (feed string, opti HeartbeatMs *uint64 `json:"heartbeat"` TimeoutMs *uint64 `json:"timeout"` AcceptEncoding string `json:"accept_encoding"` - ActiveOnly bool `json:"active_only"` // Return active revisions only + ActiveOnly bool `json:"active_only"` // Return active revisions only + RequestPlus *bool `json:"request_plus"` // Wait for sequence buffering to catch up to database seq value at time request was issued } // Initialize since clock and hasher ahead of unmarshalling sequence @@ -624,6 +641,20 @@ func (h *handler) readChangesOptionsFromJSON(jsonData []byte) (feed string, opti compress = (input.AcceptEncoding == "gzip") + if h.db != nil && feed != feedTypeContinuous { + useRequestPlus := h.db.Options.ChangesRequestPlus + if input.RequestPlus != nil { + useRequestPlus = *input.RequestPlus + } + if useRequestPlus { + var seqErr error + options.RequestPlusSeq, seqErr = h.db.GetRequestPlusSequence() + if seqErr != nil { + err = base.HTTPErrorf(http.StatusServiceUnavailable, "Unable to retrieve requestPlus sequence: %v", seqErr) + return + } + } + } return } diff --git a/rest/changestest/changes_api_test.go b/rest/changestest/changes_api_test.go index 0c6472e7af..8ffc704a99 100644 --- a/rest/changestest/changes_api_test.go +++ b/rest/changestest/changes_api_test.go @@ -3935,6 +3935,297 @@ func TestTombstoneCompaction(t *testing.T) { TestCompact(db.QueryTombstoneBatch + 20) } +// TestOneShotGrantTiming simulates a one-shot changes feed returning before a previously issued grant has been +// buffered over DCP. +func TestOneShotGrantTiming(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelDebug, base.KeyChanges, base.KeyHTTP) + + defer db.SuspendSequenceBatching()() + + rt := rest.NewRestTester(t, + &rest.RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + }) + defer rt.Close() + + // Create user with access to no channels + ctx := rt.Context() + database := rt.GetDatabase() + a := database.Authenticator(ctx) + bernard, err := a.NewUser("bernard", "letmein", nil) + assert.NoError(t, err) + assert.NoError(t, a.Save(bernard)) + + // Put several documents in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-2", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-3", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-4", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + + var changes struct { + Results []db.ChangeEntry + Last_Seq interface{} + } + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + log.Printf("Allocated slowSequence: %v", slowSequence) + + // Write a document granting user access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + rest.RequireStatus(t, response, 201) + + // Issue normal one-shot changes request. Expect no results as granting document hasn't been buffered (blocked by + // slowSequence) + changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 0) + + // Release the slow sequence and wait for it to be processed over DCP + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + require.NoError(t, rt.WaitForPendingChanges()) + + // Issue normal one-shot changes request. Expect results as granting document buffering is unblocked + changesResponse = rt.SendUserRequest("GET", "/{{.keyspace}}/_changes", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + +} + +// TestOneShotGrantRequestPlus simulates a one-shot changes feed being made before a previously issued grant has been +// buffered over DCP. When requestPlus is set, changes feed should block until grant is processed. +func TestOneShotGrantRequestPlus(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelDebug, base.KeyChanges, base.KeyHTTP) + + defer db.SuspendSequenceBatching()() // Required for slow sequence simulation + + rt := rest.NewRestTester(t, + &rest.RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + }) + defer rt.Close() + + // Create user with access to no channels + ctx := rt.Context() + database := rt.GetDatabase() + a := database.Authenticator(ctx) + bernard, err := a.NewUser("bernard", "letmein", nil) + assert.NoError(t, err) + assert.NoError(t, a.Save(bernard)) + + // Put several documents in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-2", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-3", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-4", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + + var changes struct { + Results []db.ChangeEntry + Last_Seq interface{} + } + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + + // Write a document granting user access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + rest.RequireStatus(t, response, 201) + + caughtUpStart := database.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + + var oneShotComplete sync.WaitGroup + // Issue a GET requestPlus one-shot changes request in a separate goroutine. + oneShotComplete.Add(1) + go func() { + defer oneShotComplete.Done() + changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes?request_plus=true", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + }() + + // Issue a POST requestPlus one-shot changes request in a separate goroutine. + oneShotComplete.Add(1) + go func() { + defer oneShotComplete.Done() + changesResponse := rt.SendUserRequest("POST", "/{{.keyspace}}/_changes", `{"request_plus":true}`, "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + }() + + // Wait for the one-shot changes feed to go into wait mode before releasing the slow sequence + require.NoError(t, database.WaitForTotalCaughtUp(caughtUpStart+2)) + + // Release the slow sequence and wait for it to be processed over DCP + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + require.NoError(t, rt.WaitForPendingChanges()) + + oneShotComplete.Wait() +} + +// TestOneShotGrantRequestPlusDbConfig simulates a one-shot changes feed being made before a previously issued grant has been +// buffered over DCP. When requestPlus is set via config, changes feed should block until grant is processed. +func TestOneShotGrantRequestPlusDbConfig(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelDebug, base.KeyChanges, base.KeyHTTP) + + defer db.SuspendSequenceBatching()() + + rt := rest.NewRestTester(t, + &rest.RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + DatabaseConfig: &rest.DatabaseConfig{ + DbConfig: rest.DbConfig{ + ChangesRequestPlus: base.BoolPtr(true), + }, + }, + }) + defer rt.Close() + + // Create user with access to no channels + ctx := rt.Context() + database := rt.GetDatabase() + a := database.Authenticator(ctx) + bernard, err := a.NewUser("bernard", "letmein", nil) + assert.NoError(t, err) + assert.NoError(t, a.Save(bernard)) + + // Put several documents in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-2", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-3", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-4", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + + var changes struct { + Results []db.ChangeEntry + Last_Seq interface{} + } + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + log.Printf("Allocated slowSequence: %v", slowSequence) + + // Write a document granting user access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + rest.RequireStatus(t, response, 201) + + // Issue one-shot GET changes request explicitly setting request_plus=false (should override config value). + // Expect no results as granting document hasn't been buffered (blocked by slowSequence) + changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes?request_plus=false", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 0) + + // Issue one-shot POST changes request explicitly setting request_plus=false (should override config value). + // Expect no results as granting document hasn't been buffered (blocked by slowSequence) + changesResponse = rt.SendUserRequest("POST", "/{{.keyspace}}/_changes", `{"request_plus":false}`, "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 0) + + caughtUpStart := database.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + + var oneShotComplete sync.WaitGroup + // Issue a GET one-shot changes request in a separate goroutine. Should run as request plus based on config + oneShotComplete.Add(1) + go func() { + defer oneShotComplete.Done() + changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + }() + + // Issue a POST one-shot changes request in a separate goroutine. Should run as request plus based on config + oneShotComplete.Add(1) + go func() { + defer oneShotComplete.Done() + changesResponse := rt.SendUserRequest("POST", "/{{.keyspace}}/_changes", `{}`, "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + }() + + // Wait for the one-shot changes feed to go into wait mode before releasing the slow sequence + require.NoError(t, database.WaitForTotalCaughtUp(caughtUpStart+2)) + + // Release the slow sequence and wait for it to be processed over DCP + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + require.NoError(t, rt.WaitForPendingChanges()) + + oneShotComplete.Wait() +} + func waitForCompactStopped(dbc *db.DatabaseContext) error { for i := 0; i < 100; i++ { compactRunning := dbc.CacheCompactActive() diff --git a/rest/config.go b/rest/config.go index 43b6d6a9f7..848bee10aa 100644 --- a/rest/config.go +++ b/rest/config.go @@ -165,6 +165,7 @@ type DbConfig struct { GraphQL *functions.GraphQLConfig `json:"graphql,omitempty"` // GraphQL configuration & resolver fns UserFunctions *functions.FunctionsConfig `json:"functions,omitempty"` // Named JS fns for clients to call Suspendable *bool `json:"suspendable,omitempty"` // Allow the database to be suspended + ChangesRequestPlus *bool `json:"changes_request_plus,omitempty"` // If set, is used as the default value of request_plus for non-continuous replications CORS *auth.CORSConfig `json:"cors,omitempty"` } diff --git a/rest/server_context.go b/rest/server_context.go index 2b10ccd4f5..2cb270627e 100644 --- a/rest/server_context.go +++ b/rest/server_context.go @@ -1050,6 +1050,7 @@ func dbcOptionsFromConfig(ctx context.Context, sc *ServerContext, config *DbConf GroupID: groupID, JavascriptTimeout: javascriptTimeout, Serverless: sc.Config.IsServerless(), + ChangesRequestPlus: base.BoolDefault(config.ChangesRequestPlus, false), // UserQueries: config.UserQueries, // behind feature flag (see below) // UserFunctions: config.UserFunctions, // behind feature flag (see below) // GraphQL: config.GraphQL, // behind feature flag (see below) From 2eb41b1e1535411cb6938d4052443046bac473d5 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Mon, 15 May 2023 15:55:50 -0700 Subject: [PATCH 12/42] Fix race in TestOneShotGrant tests (#6247) --- rest/changestest/changes_api_test.go | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/rest/changestest/changes_api_test.go b/rest/changestest/changes_api_test.go index 8ffc704a99..f9f63c83ca 100644 --- a/rest/changestest/changes_api_test.go +++ b/rest/changestest/changes_api_test.go @@ -4051,11 +4051,6 @@ func TestOneShotGrantRequestPlus(t *testing.T) { response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-4", `{"channel":["PBS"]}`) rest.RequireStatus(t, response, 201) - var changes struct { - Results []db.ChangeEntry - Last_Seq interface{} - } - // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped slowSequence, seqErr := db.AllocateTestSequence(database) require.NoError(t, seqErr) @@ -4071,9 +4066,10 @@ func TestOneShotGrantRequestPlus(t *testing.T) { oneShotComplete.Add(1) go func() { defer oneShotComplete.Done() + var changes rest.ChangesResults changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes?request_plus=true", "", "bernard") rest.RequireStatus(t, changesResponse, 200) - err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + err := base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) assert.NoError(t, err, "Error unmarshalling changes response") for _, entry := range changes.Results { log.Printf("Entry:%+v", entry) @@ -4085,6 +4081,7 @@ func TestOneShotGrantRequestPlus(t *testing.T) { oneShotComplete.Add(1) go func() { defer oneShotComplete.Done() + var changes rest.ChangesResults changesResponse := rt.SendUserRequest("POST", "/{{.keyspace}}/_changes", `{"request_plus":true}`, "bernard") rest.RequireStatus(t, changesResponse, 200) err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) @@ -4148,11 +4145,6 @@ func TestOneShotGrantRequestPlusDbConfig(t *testing.T) { response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-4", `{"channel":["PBS"]}`) rest.RequireStatus(t, response, 201) - var changes struct { - Results []db.ChangeEntry - Last_Seq interface{} - } - // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped slowSequence, seqErr := db.AllocateTestSequence(database) require.NoError(t, seqErr) @@ -4166,6 +4158,7 @@ func TestOneShotGrantRequestPlusDbConfig(t *testing.T) { // Expect no results as granting document hasn't been buffered (blocked by slowSequence) changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes?request_plus=false", "", "bernard") rest.RequireStatus(t, changesResponse, 200) + var changes rest.ChangesResults err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) assert.NoError(t, err, "Error unmarshalling changes response") for _, entry := range changes.Results { @@ -4191,9 +4184,10 @@ func TestOneShotGrantRequestPlusDbConfig(t *testing.T) { oneShotComplete.Add(1) go func() { defer oneShotComplete.Done() + var changes rest.ChangesResults changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes", "", "bernard") rest.RequireStatus(t, changesResponse, 200) - err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + err := base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) assert.NoError(t, err, "Error unmarshalling changes response") for _, entry := range changes.Results { log.Printf("Entry:%+v", entry) @@ -4205,9 +4199,10 @@ func TestOneShotGrantRequestPlusDbConfig(t *testing.T) { oneShotComplete.Add(1) go func() { defer oneShotComplete.Done() + var changes rest.ChangesResults changesResponse := rt.SendUserRequest("POST", "/{{.keyspace}}/_changes", `{}`, "bernard") rest.RequireStatus(t, changesResponse, 200) - err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + err := base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) assert.NoError(t, err, "Error unmarshalling changes response") for _, entry := range changes.Results { log.Printf("Entry:%+v", entry) From c24e303798625b2370f5448c5d7c64e832c322d4 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Mon, 15 May 2023 19:40:21 -0400 Subject: [PATCH 13/42] Use less or equal as a speculative fix (#6246) --- db/blip_sync_context.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/blip_sync_context.go b/db/blip_sync_context.go index 245e1e5f9c..b90560a4e0 100644 --- a/db/blip_sync_context.go +++ b/db/blip_sync_context.go @@ -672,7 +672,7 @@ func toHistory(revisions Revisions, knownRevs map[string]bool, maxHistory int) [ // timeElapsedForStatsReporting will return true if enough time has passed since the previous report. func (bsc *BlipSyncContext) timeElapsedForStatsReporting(currentTime int64) bool { - return (currentTime - bsc.stats.lastReportTime.Load()) > bsc.blipContextDb.Options.BlipStatsReportingInterval + return (currentTime - bsc.stats.lastReportTime.Load()) >= bsc.blipContextDb.Options.BlipStatsReportingInterval } // reportStats will update the stats on a database immediately if updateImmediately is true, otherwise update on BlipStatsReportinInterval From 4fe200cf387419034ec250ee614f9e760900710a Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 16 May 2023 14:05:15 +0100 Subject: [PATCH 14/42] CBG-2944: Ensure proveAttachments works for v2 attachments with a v2 replication protocol (#6242) --- base/stats.go | 15 +++- db/blip_handler.go | 4 +- db/blip_sync_stats.go | 2 + rest/blip_api_attachment_test.go | 115 +++++++++++++++++++++++++++++++ rest/blip_client_test.go | 59 +++++++++++++++- 5 files changed, 191 insertions(+), 4 deletions(-) diff --git a/base/stats.go b/base/stats.go index ccd5a37d36..224aafbcf0 100644 --- a/base/stats.go +++ b/base/stats.go @@ -376,7 +376,8 @@ type CBLReplicationPullStats struct { // The total amount of time processing rev messages (revisions) during pull revision. RevProcessingTime *SgwIntStat `json:"rev_processing_time"` // The total number of rev messages processed during replication. - RevSendCount *SgwIntStat `json:"rev_send_count"` + RevSendCount *SgwIntStat `json:"rev_send_count"` + RevErrorCount *SgwIntStat `json:"rev_error_count"` // The total amount of time between Sync Gateway receiving a request for a revision and that revision being sent. // // In a pull replication, Sync Gateway sends a /_changes request to the client and the client responds with the list of revisions it wants to receive. @@ -392,6 +393,8 @@ type CBLReplicationPushStats struct { AttachmentPushCount *SgwIntStat `json:"attachment_push_count"` // The total number of documents pushed. DocPushCount *SgwIntStat `json:"doc_push_count"` + // The total number of documents that failed to push. + DocPushErrorCount *SgwIntStat `json:"doc_push_error_count"` // The total number of changes and-or proposeChanges messages processed since node start-up. ProposeChangeCount *SgwIntStat `json:"propose_change_count"` // The total time spent processing changes and/or proposeChanges messages. @@ -1202,6 +1205,10 @@ func (d *DbStats) initCBLReplicationPullStats() error { if err != nil { return err } + resUtil.RevErrorCount, err = NewIntStat(SubsystemReplicationPull, "rev_error_count", labelKeys, labelVals, prometheus.CounterValue, 0) + if err != nil { + return err + } resUtil.RevSendLatency, err = NewIntStat(SubsystemReplicationPull, "rev_send_latency", labelKeys, labelVals, prometheus.CounterValue, 0) if err != nil { return err @@ -1227,6 +1234,7 @@ func (d *DbStats) unregisterCBLReplicationPullStats() { prometheus.Unregister(d.CBLReplicationPullStats.RequestChangesTime) prometheus.Unregister(d.CBLReplicationPullStats.RevProcessingTime) prometheus.Unregister(d.CBLReplicationPullStats.RevSendCount) + prometheus.Unregister(d.CBLReplicationPullStats.RevErrorCount) prometheus.Unregister(d.CBLReplicationPullStats.RevSendLatency) } @@ -1252,6 +1260,10 @@ func (d *DbStats) initCBLReplicationPushStats() error { if err != nil { return err } + resUtil.DocPushErrorCount, err = NewIntStat(SubsystemReplicationPush, "doc_push_error_count", labelKeys, labelVals, prometheus.GaugeValue, 0) + if err != nil { + return err + } resUtil.ProposeChangeCount, err = NewIntStat(SubsystemReplicationPush, "propose_change_count", labelKeys, labelVals, prometheus.CounterValue, 0) if err != nil { return err @@ -1273,6 +1285,7 @@ func (d *DbStats) unregisterCBLReplicationPushStats() { prometheus.Unregister(d.CBLReplicationPushStats.AttachmentPushBytes) prometheus.Unregister(d.CBLReplicationPushStats.AttachmentPushCount) prometheus.Unregister(d.CBLReplicationPushStats.DocPushCount) + prometheus.Unregister(d.CBLReplicationPushStats.DocPushErrorCount) prometheus.Unregister(d.CBLReplicationPushStats.ProposeChangeCount) prometheus.Unregister(d.CBLReplicationPushStats.ProposeChangeTime) prometheus.Unregister(d.CBLReplicationPushStats.WriteProcessingTime) diff --git a/db/blip_handler.go b/db/blip_handler.go index 90c35d9840..37cfdbb15f 100644 --- a/db/blip_handler.go +++ b/db/blip_handler.go @@ -1220,7 +1220,9 @@ func (bh *blipHandler) handleProveAttachment(rq *blip.Message) error { return base.HTTPErrorf(http.StatusBadRequest, "no digest sent with proveAttachment") } - attData, err := bh.collection.GetAttachment(base.AttPrefix + digest) + allowedAttachment := bh.allowedAttachment(digest) + attachmentKey := MakeAttachmentKey(allowedAttachment.version, allowedAttachment.docID, digest) + attData, err := bh.collection.GetAttachment(attachmentKey) if err != nil { if bh.clientType == BLIPClientTypeSGR2 { return ErrAttachmentNotFound diff --git a/db/blip_sync_stats.go b/db/blip_sync_stats.go index 25fedee4ff..6286eae453 100644 --- a/db/blip_sync_stats.go +++ b/db/blip_sync_stats.go @@ -129,11 +129,13 @@ func BlipSyncStatsForCBL(dbStats *base.DbStats) *BlipSyncStats { blipStats.SendRevBytes = dbStats.Database().DocReadsBytesBlip blipStats.SendRevCount = dbStats.Database().NumDocReadsBlip + blipStats.SendRevErrorTotal = dbStats.CBLReplicationPull().RevErrorCount blipStats.HandleRevBytes = dbStats.Database().DocWritesBytesBlip blipStats.HandleRevProcessingTime = dbStats.CBLReplicationPush().WriteProcessingTime blipStats.HandleRevCount = dbStats.CBLReplicationPush().DocPushCount + blipStats.HandleRevErrorCount = dbStats.CBLReplicationPush().DocPushErrorCount blipStats.HandleGetAttachment = dbStats.CBLReplicationPull().AttachmentPullCount blipStats.HandleGetAttachmentBytes = dbStats.CBLReplicationPull().AttachmentPullBytes diff --git a/rest/blip_api_attachment_test.go b/rest/blip_api_attachment_test.go index a322369263..69cd0ca280 100644 --- a/rest/blip_api_attachment_test.go +++ b/rest/blip_api_attachment_test.go @@ -178,6 +178,121 @@ func TestBlipPushPullV2AttachmentV3Client(t *testing.T) { assert.Equal(t, int64(1), rt.GetDatabase().DbStats.CBLReplicationPush().AttachmentPushCount.Value()) assert.Equal(t, int64(11), rt.GetDatabase().DbStats.CBLReplicationPush().AttachmentPushBytes.Value()) } + +// TestBlipProveAttachmentV2 ensures that CBL's proveAttachment for deduplication is working correctly even for v2 attachments which aren't de-duped on the server side. +func TestBlipProveAttachmentV2(t *testing.T) { + base.SetUpTestLogging(t, base.LevelTrace, base.KeyAll) + rtConfig := RestTesterConfig{ + GuestEnabled: true, + } + rt := NewRestTester(t, &rtConfig) + defer rt.Close() + + btc, err := NewBlipTesterClientOptsWithRT(t, rt, &BlipTesterClientOpts{ + SupportedBLIPProtocols: []string{db.BlipCBMobileReplicationV2}, + }) + require.NoError(t, err) + defer btc.Close() + + err = btc.StartPull() + assert.NoError(t, err) + + const ( + doc1ID = "doc1" + doc2ID = "doc2" + ) + + const ( + attachmentName = "hello.txt" + attachmentData = "hello world" + ) + + var ( + attachmentDataB64 = base64.StdEncoding.EncodeToString([]byte(attachmentData)) + attachmentDigest = "sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=" + ) + + // Create two docs with the same attachment data on SG - v2 attachments intentionally result in two copies, + // CBL will still de-dupe attachments based on digest, so will still try proveAttachmnet for the 2nd. + doc1Body := fmt.Sprintf(`{"greetings":[{"hi": "alice"}],"_attachments":{"%s":{"data":"%s"}}}`, attachmentName, attachmentDataB64) + response := rt.SendAdminRequest(http.MethodPut, "/{{.keyspace}}/"+doc1ID, doc1Body) + RequireStatus(t, response, http.StatusCreated) + doc1RevID := RespRevID(t, response) + + data, ok := btc.WaitForRev(doc1ID, doc1RevID) + require.True(t, ok) + bodyTextExpected := fmt.Sprintf(`{"greetings":[{"hi":"alice"}],"_attachments":{"%s":{"revpos":1,"length":%d,"stub":true,"digest":"%s"}}}`, attachmentName, len(attachmentData), attachmentDigest) + require.JSONEq(t, bodyTextExpected, string(data)) + + // create doc2 now that we know the client has the attachment + doc2Body := fmt.Sprintf(`{"greetings":[{"howdy": "bob"}],"_attachments":{"%s":{"data":"%s"}}}`, attachmentName, attachmentDataB64) + response = rt.SendAdminRequest(http.MethodPut, "/{{.keyspace}}/"+doc2ID, doc2Body) + RequireStatus(t, response, http.StatusCreated) + doc2RevID := RespRevID(t, response) + + data, ok = btc.WaitForRev(doc2ID, doc2RevID) + require.True(t, ok) + bodyTextExpected = fmt.Sprintf(`{"greetings":[{"howdy":"bob"}],"_attachments":{"%s":{"revpos":1,"length":%d,"stub":true,"digest":"%s"}}}`, attachmentName, len(attachmentData), attachmentDigest) + require.JSONEq(t, bodyTextExpected, string(data)) + + assert.Equal(t, int64(2), rt.GetDatabase().DbStats.CBLReplicationPull().RevSendCount.Value()) + assert.Equal(t, int64(0), rt.GetDatabase().DbStats.CBLReplicationPull().RevErrorCount.Value()) + assert.Equal(t, int64(1), rt.GetDatabase().DbStats.CBLReplicationPull().AttachmentPullCount.Value()) + assert.Equal(t, int64(len(attachmentData)), rt.GetDatabase().DbStats.CBLReplicationPull().AttachmentPullBytes.Value()) +} + +// TestBlipProveAttachmentV2Push ensures that CBL's attachment deduplication is ignored for push replications - resulting in new server-side digests and duplicated attachment data (v2 attachment format). +func TestBlipProveAttachmentV2Push(t *testing.T) { + base.SetUpTestLogging(t, base.LevelTrace, base.KeyAll) + rtConfig := RestTesterConfig{ + GuestEnabled: true, + } + rt := NewRestTester(t, &rtConfig) + defer rt.Close() + + btc, err := NewBlipTesterClientOptsWithRT(t, rt, &BlipTesterClientOpts{ + SupportedBLIPProtocols: []string{db.BlipCBMobileReplicationV2}, + }) + require.NoError(t, err) + defer btc.Close() + + const ( + doc1ID = "doc1" + doc2ID = "doc2" + ) + + const ( + attachmentName = "hello.txt" + attachmentData = "hello world" + ) + + var ( + attachmentDataB64 = base64.StdEncoding.EncodeToString([]byte(attachmentData)) + // attachmentDigest = "sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=" + ) + + // Create two docs with the same attachment data on the client - v2 attachments intentionally result in two copies stored on the server, despite the client being able to share the data for both. + doc1Body := fmt.Sprintf(`{"greetings":[{"hi": "alice"}],"_attachments":{"%s":{"data":"%s"}}}`, attachmentName, attachmentDataB64) + doc1revID, err := btc.PushRev(doc1ID, "", []byte(doc1Body)) + require.NoError(t, err) + + err = rt.WaitForRev(doc1ID, doc1revID) + require.NoError(t, err) + + // create doc2 now that we know the server has the attachment - SG should still request the attachment data from the client. + doc2Body := fmt.Sprintf(`{"greetings":[{"howdy": "bob"}],"_attachments":{"%s":{"data":"%s"}}}`, attachmentName, attachmentDataB64) + doc2RevID, err := btc.PushRev(doc2ID, "", []byte(doc2Body)) + require.NoError(t, err) + + err = rt.WaitForRev(doc2ID, doc2RevID) + require.NoError(t, err) + + assert.Equal(t, int64(2), rt.GetDatabase().DbStats.CBLReplicationPush().DocPushCount.Value()) + assert.Equal(t, int64(0), rt.GetDatabase().DbStats.CBLReplicationPush().DocPushErrorCount.Value()) + assert.Equal(t, int64(2), rt.GetDatabase().DbStats.CBLReplicationPush().AttachmentPushCount.Value()) + assert.Equal(t, int64(2*len(attachmentData)), rt.GetDatabase().DbStats.CBLReplicationPush().AttachmentPushBytes.Value()) +} + func TestBlipPushPullNewAttachmentCommonAncestor(t *testing.T) { base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll) rtConfig := RestTesterConfig{ diff --git a/rest/blip_client_test.go b/rest/blip_client_test.go index 0b09af007b..cf4e2b554c 100644 --- a/rest/blip_client_test.go +++ b/rest/blip_client_test.go @@ -310,6 +310,7 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { } var missingDigests []string + var knownDigests []string btcr.attachmentsLock.RLock() for _, attachment := range attsMap { attMap, ok := attachment.(map[string]interface{}) @@ -320,10 +321,63 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { if _, found := btcr.attachments[digest]; !found { missingDigests = append(missingDigests, digest) + } else { + if btr.bt.blipContext.ActiveSubprotocol() == db.BlipCBMobileReplicationV2 { + // only v2 clients care about proveAttachments + knownDigests = append(knownDigests, digest) + } } } btcr.attachmentsLock.RUnlock() + for _, digest := range knownDigests { + attData, err := btcr.getAttachment(digest) + if err != nil { + panic(err) + } + nonce, proof, err := db.GenerateProofOfAttachment(attData) + if err != nil { + panic(err) + } + + // if we already have this attachment, _we_ should ask the peer whether _they_ have the attachment + outrq := blip.NewRequest() + outrq.SetProfile(db.MessageProveAttachment) + outrq.Properties[db.ProveAttachmentDigest] = digest + outrq.SetBody(nonce) + + err = btcr.sendPullMsg(outrq) + if err != nil { + panic(err) + } + + resp := outrq.Response() + btc.pullReplication.storeMessage(resp) + respBody, err := resp.Body() + if err != nil { + panic(err) + } + + if resp.Type() == blip.ErrorType { + // forward error from proveAttachment response into rev response + if !msg.NoReply() { + response := msg.Response() + errorCode, _ := strconv.Atoi(resp.Properties["Error-Code"]) + response.SetError(resp.Properties["Error-Code"], errorCode, string(respBody)) + } + return + } + + if string(respBody) != proof { + // forward error from proveAttachment response into rev response + if !msg.NoReply() { + response := msg.Response() + response.SetError(resp.Properties["Error-Code"], http.StatusForbidden, fmt.Sprintf("Incorrect proof for attachment %s", digest)) + } + return + } + } + for _, digest := range missingDigests { outrq := blip.NewRequest() outrq.SetProfile(db.MessageGetAttachment) @@ -430,10 +484,11 @@ func (btc *BlipTesterCollectionClient) saveAttachment(_, base64data string) (dat digest = db.Sha1DigestKey(data) if _, found := btc.attachments[digest]; found { - return 0, "", fmt.Errorf("attachment with digest already exists") + base.InfofCtx(context.TODO(), base.KeySync, "attachment with digest %s already exists", digest) + } else { + btc.attachments[digest] = data } - btc.attachments[digest] = data return len(data), digest, nil } From 3655a099479622456a94512485593ed3b4fcb816 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 17 May 2023 12:51:05 +0100 Subject: [PATCH 15/42] Make turnOffNoDelay log more info in error case (#6250) --- base/util.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/util.go b/base/util.go index 91484eff4d..39a49d981d 100644 --- a/base/util.go +++ b/base/util.go @@ -1652,7 +1652,7 @@ func GetHttpClientForWebSocket(insecureSkipVerify bool) *http.Client { // (There's really no reason for a caller to take note of the return value.) func turnOffNoDelay(ctx context.Context, conn net.Conn) bool { if tcpConn, ok := conn.(*net.TCPConn); !ok { - WarnfCtx(ctx, "Couldn't turn off NODELAY for %v: it's not a TCPConn", conn) + WarnfCtx(ctx, "Couldn't turn off NODELAY for %v: %T is not type *net.TCPConn", conn, conn) } else if err := tcpConn.SetNoDelay(false); err != nil { WarnfCtx(ctx, "Couldn't turn off NODELAY for %v: %v", conn, err) } else { From 044bf5b93fcb87077f4eb479e56fde0dc6d1d4cc Mon Sep 17 00:00:00 2001 From: Gregory Newman-Smith <109068393+gregns1@users.noreply.github.com> Date: Wed, 17 May 2023 14:25:42 +0100 Subject: [PATCH 16/42] CBG-2973: Fix panic for assigning to nil map inside Mutable1xBody (#6252) --- db/revision_cache_interface.go | 3 +++ rest/api_test.go | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/db/revision_cache_interface.go b/db/revision_cache_interface.go index c3fc20aca4..2f82796b72 100644 --- a/db/revision_cache_interface.go +++ b/db/revision_cache_interface.go @@ -162,6 +162,9 @@ func (rev *DocumentRevision) Mutable1xBody(db *DatabaseCollectionWithUser, reque if err != nil { return nil, err } + if b == nil { + return nil, base.RedactErrorf("null doc body for docID: %s revID: %s", base.UD(rev.DocID), base.UD(rev.RevID)) + } b[BodyId] = rev.DocID b[BodyRev] = rev.RevID diff --git a/rest/api_test.go b/rest/api_test.go index 6458df6b27..4feb418d0b 100644 --- a/rest/api_test.go +++ b/rest/api_test.go @@ -2678,6 +2678,19 @@ func TestDocChannelSetPruning(t *testing.T) { assert.Equal(t, uint64(12), syncData.ChannelSetHistory[0].End) } +func TestNullDocHandlingForMutable1xBody(t *testing.T) { + rt := NewRestTester(t, nil) + defer rt.Close() + collection := rt.GetSingleTestDatabaseCollectionWithUser() + + documentRev := db.DocumentRevision{DocID: "doc1", BodyBytes: []byte("null")} + + body, err := documentRev.Mutable1xBody(collection, nil, nil, false) + require.Error(t, err) + require.Nil(t, body) + assert.Contains(t, err.Error(), "null doc body for doc") +} + func TestTombstoneCompactionAPI(t *testing.T) { rt := NewRestTester(t, nil) rt.GetDatabase().PurgeInterval = 0 From c1cf34e8a0c6529dd821e5fa6cda3a447ab019c0 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 17 May 2023 14:46:54 +0100 Subject: [PATCH 17/42] Fix RedactableError not satisfying the Redactor interface (#6253) --- base/redactable_error.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/base/redactable_error.go b/base/redactable_error.go index 9d59071e39..db979f01da 100644 --- a/base/redactable_error.go +++ b/base/redactable_error.go @@ -12,14 +12,18 @@ package base import "fmt" -// A redactable error can be used as a drop-in replacement for a base error (as would have been created via -// fmt.Errorf), which has the ability to redact any sensitive user data by calling redact() on all if it's -// stored args. +// RedactableError is an error that can be used as a drop-in replacement for an error, +// which has the ability to redact any sensitive data by calling redact() on all of its args. type RedactableError struct { fmt string args []interface{} } +var ( + _ error = &RedactableError{} + _ Redactor = &RedactableError{} +) + // Create a new redactable error. Same signature as fmt.Errorf() for easy drop-in replacement. func RedactErrorf(fmt string, args ...interface{}) *RedactableError { return &RedactableError{ @@ -28,12 +32,17 @@ func RedactErrorf(fmt string, args ...interface{}) *RedactableError { } } -// Satisfy error interface +// Error satisfies the error interface func (re *RedactableError) Error() string { + return re.String() +} + +// String returns a non-redacted version of the error - satisfies the Redactor interface. +func (re *RedactableError) String() string { return fmt.Sprintf(re.fmt, re.args...) } -// Satisfy redact interface +// Redact returns a redacted version of the error - satisfies the Redactor interface. func (re *RedactableError) Redact() string { redactedArgs := redact(re.args) return fmt.Sprintf(re.fmt, redactedArgs...) From 5c7673ef195c781398e117086d282ab9a5a49e6c Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Thu, 18 May 2023 11:44:03 -0400 Subject: [PATCH 18/42] CBG-2998 always set no TLS bootstrap parameter to false for cbgt (#6254) * CBG-2998 always set parameter to false * update comment --- base/dcp_sharded.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/dcp_sharded.go b/base/dcp_sharded.go index f124981d7f..726c9a2dd9 100644 --- a/base/dcp_sharded.go +++ b/base/dcp_sharded.go @@ -16,7 +16,6 @@ import ( "crypto/tls" "fmt" "sort" - "strconv" "strings" "sync" @@ -326,8 +325,9 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG options := make(map[string]string) options[cbgt.FeedAllotmentOption] = cbgt.FeedAllotmentOnePerPIndex options["managerLoadDataDir"] = "false" - // Ensure we always use TLS if configured - cbgt defaults to non-TLS on initial connection - options["feedInitialBootstrapNonTLS"] = strconv.FormatBool(!spec.IsTLS()) + // TLS is controlled by the connection string. + // cbgt uses this parameter to run in mixed mode - non-TLS for CCCP but TLS for memcached. Sync Gateway does not need to set this parameter. + options["feedInitialBootstrapNonTLS"] = "false" // Disable collections if unsupported if !bucket.IsSupported(sgbucket.BucketStoreFeatureCollections) { From c3146d895c80a76cdd9c4841c0fb539f37ff5c9f Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Thu, 18 May 2023 16:55:29 -0400 Subject: [PATCH 19/42] CBG-2905 remove cached connections when bucket disappear (#6251) * remove cached buckets * Add refcounting for parallel calls to get buckets --- base/bootstrap.go | 128 +++++++++++++++++++++++++++++------------ base/bootstrap_test.go | 82 ++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 38 deletions(-) diff --git a/base/bootstrap.go b/base/bootstrap.go index ed7126e868..33ea8e3749 100644 --- a/base/bootstrap.go +++ b/base/bootstrap.go @@ -53,11 +53,11 @@ type CouchbaseCluster struct { clusterOptions gocb.ClusterOptions forcePerBucketAuth bool // Forces perBucketAuth authenticators to be used to connect to the bucket perBucketAuth map[string]*gocb.Authenticator - bucketConnectionMode BucketConnectionMode // Whether to cache cluster connections - cachedClusterConnection *gocb.Cluster // Cached cluster connection, should only be used by GetConfigBuckets - cachedBucketConnections map[string]*cachedBucket // Per-bucket cached connections - cachedConnectionLock sync.Mutex // mutex for access to cachedBucketConnections - configPersistence ConfigPersistence // ConfigPersistence mode + bucketConnectionMode BucketConnectionMode // Whether to cache cluster connections + cachedClusterConnection *gocb.Cluster // Cached cluster connection, should only be used by GetConfigBuckets + cachedBucketConnections cachedBucketConnections // Per-bucket cached connections + cachedConnectionLock sync.Mutex // mutex for access to cachedBucketConnections + configPersistence ConfigPersistence // ConfigPersistence mode } type BucketConnectionMode int @@ -70,13 +70,73 @@ const ( ) type cachedBucket struct { - bucket *gocb.Bucket - teardownFn func() + bucket *gocb.Bucket // underlying bucket + bucketCloseFn func() // teardown function which will close the gocb connection + refcount int // count of how many functions are using this cachedBucket + shouldClose bool // mark this cachedBucket as needing to be closed with ref } -// noopTeardown is returned by getBucket when using a cached bucket - these buckets are torn down -// when CouchbaseCluster.Close is called. -func noopTeardown() {} +// cahedBucketConnections is a lockable map cached buckets containing refcounts +type cachedBucketConnections struct { + buckets map[string]*cachedBucket + lock sync.Mutex +} + +// removeOutdatedBuckets marks any active buckets for closure and removes the cached connections. +func (c *cachedBucketConnections) removeOutdatedBuckets(activeBuckets Set) { + c.lock.Lock() + defer c.lock.Unlock() + for bucketName, bucket := range c.buckets { + _, exists := activeBuckets[bucketName] + if exists { + continue + } + bucket.shouldClose = true + c._teardown(bucketName) + } +} + +// closeAll removes all cached bucekts +func (c *cachedBucketConnections) closeAll() { + c.lock.Lock() + defer c.lock.Unlock() + for _, bucket := range c.buckets { + bucket.shouldClose = true + bucket.bucketCloseFn() + } +} + +// teardown closes the cached bucket connection while locked, suitable for CouchbaseCluster.getBucket() teardowns +func (c *cachedBucketConnections) teardown(bucketName string) { + c.lock.Lock() + defer c.lock.Unlock() + c.buckets[bucketName].refcount-- + c._teardown(bucketName) +} + +// _teardown closes expects the lock to be acquired before calling this function and the reference count to be up to date. +func (c *cachedBucketConnections) _teardown(bucketName string) { + if !c.buckets[bucketName].shouldClose || c.buckets[bucketName].refcount > 0 { + return + } + c.buckets[bucketName].bucketCloseFn() + delete(c.buckets, bucketName) +} + +// get returns a cachedBucket for a given bucketName, or nil if it doesn't exist +func (c *cachedBucketConnections) _get(bucketName string) *cachedBucket { + bucket, ok := c.buckets[bucketName] + if !ok { + return nil + } + c.buckets[bucketName].refcount++ + return bucket +} + +// set adds a cachedBucket for a given bucketName, or nil if it doesn't exist +func (c *cachedBucketConnections) _set(bucketName string, bucket *cachedBucket) { + c.buckets[bucketName] = bucket +} var _ BootstrapConnection = &CouchbaseCluster{} @@ -128,7 +188,7 @@ func NewCouchbaseCluster(server, username, password, } if bucketMode == CachedClusterConnections { - cbCluster.cachedBucketConnections = make(map[string]*cachedBucket) + cbCluster.cachedBucketConnections = cachedBucketConnections{buckets: make(map[string]*cachedBucket)} } cbCluster.configPersistence = &DocumentBootstrapPersistence{} @@ -243,6 +303,8 @@ func (cc *CouchbaseCluster) GetConfigBuckets() ([]string, error) { bucketList = append(bucketList, bucketName) } + cc.cachedBucketConnections.removeOutdatedBuckets(SetOf(bucketList...)) + return bucketList, nil } @@ -399,13 +461,11 @@ func (cc *CouchbaseCluster) KeyExists(location, docID string) (exists bool, err // Close calls teardown for any cached buckets and removes from cachedBucketConnections func (cc *CouchbaseCluster) Close() { + cc.cachedBucketConnections.closeAll() + cc.cachedConnectionLock.Lock() defer cc.cachedConnectionLock.Unlock() - for bucketName, cachedBucket := range cc.cachedBucketConnections { - cachedBucket.teardownFn() - delete(cc.cachedBucketConnections, bucketName) - } if cc.cachedClusterConnection != nil { _ = cc.cachedClusterConnection.Close(nil) cc.cachedClusterConnection = nil @@ -418,36 +478,28 @@ func (cc *CouchbaseCluster) getBucket(bucketName string) (b *gocb.Bucket, teardo return cc.connectToBucket(bucketName) } - cc.cachedConnectionLock.Lock() - defer cc.cachedConnectionLock.Unlock() - - cacheBucket, ok := cc.cachedBucketConnections[bucketName] - if ok { - return cacheBucket.bucket, noopTeardown, nil + teardownFn = func() { + cc.cachedBucketConnections.teardown(bucketName) + } + cc.cachedBucketConnections.lock.Lock() + defer cc.cachedBucketConnections.lock.Unlock() + bucket := cc.cachedBucketConnections._get(bucketName) + if bucket != nil { + return bucket.bucket, teardownFn, nil } // cached bucket not found, connect and add - newBucket, newTeardownFn, err := cc.connectToBucket(bucketName) + newBucket, bucketCloseFn, err := cc.connectToBucket(bucketName) if err != nil { return nil, nil, err } - cc.cachedBucketConnections[bucketName] = &cachedBucket{ - bucket: newBucket, - teardownFn: newTeardownFn, - } - return newBucket, noopTeardown, nil -} - -// For unrecoverable errors when using cached buckets, remove the bucket from the cache to trigger a new connection on next usage -func (cc *CouchbaseCluster) onCachedBucketError(bucketName string) { + cc.cachedBucketConnections._set(bucketName, &cachedBucket{ + bucket: newBucket, + bucketCloseFn: bucketCloseFn, + refcount: 1, + }) - cc.cachedConnectionLock.Lock() - defer cc.cachedConnectionLock.Unlock() - cacheBucket, ok := cc.cachedBucketConnections[bucketName] - if ok { - cacheBucket.teardownFn() - delete(cc.cachedBucketConnections, bucketName) - } + return newBucket, teardownFn, nil } // connectToBucket establishes a new connection to a bucket, and returns the bucket after waiting for it to be ready. diff --git a/base/bootstrap_test.go b/base/bootstrap_test.go index 4c29b1ddb3..c5512e6609 100644 --- a/base/bootstrap_test.go +++ b/base/bootstrap_test.go @@ -9,6 +9,7 @@ package base import ( + "sync" "testing" "github.com/imdario/mergo" @@ -33,3 +34,84 @@ func TestMergeStructPointer(t *testing.T) { assert.Equal(t, "changed", source.Ptr.S) assert.Equal(t, IntPtr(5), source.Ptr.I) } + +func TestBootstrapRefCounting(t *testing.T) { + if UnitTestUrlIsWalrus() { + t.Skip("Test requires making a connection to CBS") + } + // Integration tests are configured to run in these parameters, they are used in main_test_bucket_pool.go + // Future enhancement would be to allow all integration tests to run with TLS + x509CertPath := "" + x509KeyPath := "" + caCertPath := "" + forcePerBucketAuth := false + tlsSkipVerify := BoolPtr(false) + var perBucketCredentialsConfig map[string]*CredentialsConfig + + cluster, err := NewCouchbaseCluster(UnitTestUrl(), TestClusterUsername(), TestClusterPassword(), x509CertPath, x509KeyPath, caCertPath, forcePerBucketAuth, perBucketCredentialsConfig, tlsSkipVerify, BoolPtr(TestUseXattrs()), CachedClusterConnections) + require.NoError(t, err) + defer cluster.Close() + require.NotNil(t, cluster) + + clusterConnection, err := cluster.getClusterConnection() + require.NoError(t, err) + require.NotNil(t, clusterConnection) + + buckets, err := cluster.GetConfigBuckets() + require.NoError(t, err) + require.Len(t, buckets, tbpNumBuckets()) + // GetConfigBuckets doesn't cache connections, it uses cluster connection to determine number of buckets + require.Len(t, cluster.cachedBucketConnections.buckets, 0) + + primeBucketConnectionCache := func(bucketNames []string) { + // Bucket CRUD ops do cache connections + for _, bucketName := range bucketNames { + exists, err := cluster.KeyExists(bucketName, "keyThatDoesNotExist") + require.NoError(t, err) + require.False(t, exists) + } + } + + primeBucketConnectionCache(buckets) + require.Len(t, cluster.cachedBucketConnections.buckets, tbpNumBuckets()) + + // call removeOutdatedBuckets as no-op + cluster.cachedBucketConnections.removeOutdatedBuckets(SetOf(buckets...)) + require.Len(t, cluster.cachedBucketConnections.buckets, tbpNumBuckets()) + + // call removeOutdatedBuckets to remove all cached buckets, call multiple times to make sure idempotent + for i := 0; i < 3; i++ { + cluster.cachedBucketConnections.removeOutdatedBuckets(Set{}) + require.Len(t, cluster.cachedBucketConnections.buckets, 0) + } + + primeBucketConnectionCache(buckets) + require.Len(t, cluster.cachedBucketConnections.buckets, tbpNumBuckets()) + + // make sure that you can still use an active connection while the bucket has been removed + wg := sync.WaitGroup{} + wg.Add(1) + makeConnection := make(chan struct{}) + go func() { + defer wg.Done() + b, teardown, err := cluster.getBucket(buckets[0]) + defer teardown() + require.NoError(t, err) + require.NotNil(t, b) + <-makeConnection + // make sure that we can still use bucket after it is no longer cached + exists, err := cluster.configPersistence.keyExists(b.DefaultCollection(), "keyThatDoesNotExist") + require.NoError(t, err) + require.False(t, exists) + }() + + cluster.cachedBucketConnections.removeOutdatedBuckets(Set{}) + require.Len(t, cluster.cachedBucketConnections.buckets, 0) + makeConnection <- struct{}{} + + wg.Wait() + + // make sure you can "remove" a non existent bucket in the case that bucket removal is called multiple times + cluster.cachedBucketConnections.removeOutdatedBuckets(SetOf("not-a-bucket")) + +} From cb6aef7f361a8822f47b9db245b6006a0389840d Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Fri, 19 May 2023 13:38:56 +0100 Subject: [PATCH 20/42] Log around config.FromConnStr to diagnose slow DNS SRV resolution (#6255) --- base/constants.go | 3 +++ base/dcp_client.go | 7 +++++++ rest/server_context.go | 11 +++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/base/constants.go b/base/constants.go index ed02018fce..ca804d9533 100644 --- a/base/constants.go +++ b/base/constants.go @@ -154,6 +154,9 @@ const ( // ServerlessChannelLimit is hard limit on channels allowed per user when running in serverless mode ServerlessChannelLimit = 500 + + // FromConnStrWarningThreshold determines the amount of time it should take before we warn about parsing a connstr (mostly for DNS resolution) + FromConnStrWarningThreshold = 10 * time.Second ) const ( diff --git a/base/dcp_client.go b/base/dcp_client.go index 46547ec52b..a973e7dd38 100644 --- a/base/dcp_client.go +++ b/base/dcp_client.go @@ -320,10 +320,17 @@ func (dc *DCPClient) initAgent(spec BucketSpec) error { } agentConfig := gocbcore.DCPAgentConfig{} + DebugfCtx(context.TODO(), KeyAll, "Parsing cluster connection string %q", UD(connStr)) + beforeFromConnStr := time.Now() connStrError := agentConfig.FromConnStr(connStr) if connStrError != nil { return fmt.Errorf("Unable to start DCP Client - error building conn str: %v", connStrError) } + if d := time.Since(beforeFromConnStr); d > FromConnStrWarningThreshold { + WarnfCtx(context.TODO(), "Parsed cluster connection string %q in: %v", UD(connStr), d) + } else { + DebugfCtx(context.TODO(), KeyAll, "Parsed cluster connection string %q in: %v", UD(connStr), d) + } auth, authErr := spec.GocbcoreAuthProvider() if authErr != nil { diff --git a/rest/server_context.go b/rest/server_context.go index 2cb270627e..3166947dd1 100644 --- a/rest/server_context.go +++ b/rest/server_context.go @@ -1523,10 +1523,17 @@ func initClusterAgent(ctx context.Context, clusterAddress, clusterUser, clusterP }, } + base.DebugfCtx(ctx, base.KeyAll, "Parsing cluster connection string %q", base.UD(clusterAddress)) + beforeFromConnStr := time.Now() err = config.FromConnStr(clusterAddress) if err != nil { return nil, err } + if d := time.Since(beforeFromConnStr); d > base.FromConnStrWarningThreshold { + base.WarnfCtx(ctx, "Parsed cluster connection string %q in: %v", base.UD(clusterAddress), d) + } else { + base.DebugfCtx(ctx, base.KeyAll, "Parsed cluster connection string %q in: %v", base.UD(clusterAddress), d) + } agent, err := gocbcore.CreateAgent(&config) if err != nil { @@ -1838,9 +1845,9 @@ func (sc *ServerContext) Database(ctx context.Context, name string) *db.Database } func (sc *ServerContext) initializeCouchbaseServerConnections(ctx context.Context) error { - base.InfofCtx(ctx, base.KeyAll, "initializing server connections") + base.InfofCtx(ctx, base.KeyAll, "Initializing server connections") defer func() { - base.InfofCtx(ctx, base.KeyAll, "finished initializing server connections") + base.InfofCtx(ctx, base.KeyAll, "Finished initializing server connections") }() goCBAgent, err := sc.initializeGoCBAgent(ctx) if err != nil { From 0f6660f15c2a66113d899abff47355246e4866c8 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Tue, 23 May 2023 12:17:49 -0400 Subject: [PATCH 21/42] Update waiting error message from generic message (#6259) * Update waiting error message from generic message Co-authored-by: Ben Brooks --- rest/server_context.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/rest/server_context.go b/rest/server_context.go index 3166947dd1..d02d00ef32 100644 --- a/rest/server_context.go +++ b/rest/server_context.go @@ -557,10 +557,16 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config for scopeName, scopeConfig := range config.Scopes { for collectionName, _ := range scopeConfig.Collections { var dataStore sgbucket.DataStore - err := base.WaitForNoError(func() error { + + waitForCollection := func() (bool, error, interface{}) { dataStore, err = bucket.NamedDataStore(base.ScopeAndCollectionName{Scope: scopeName, Collection: collectionName}) - return err - }) + return err != nil, err, nil + } + + err, _ := base.RetryLoop( + fmt.Sprintf("waiting for %s.%s.%s to exist", base.MD(bucket.GetName()), base.MD(scopeName), base.MD(collectionName)), + waitForCollection, + base.CreateMaxDoublingSleeperFunc(30, 10, 1000)) if err != nil { return nil, fmt.Errorf("error attempting to create/update database: %w", err) } From 75def59e0d384c7660d754d83fc638e9110beefb Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Thu, 25 May 2023 23:10:59 +0100 Subject: [PATCH 22/42] Tweak TestIncrCounter to cover non-equal def and amt values (#6263) * Tweak TestIncrCounter to cover non-equal `def` and `amt` values * Allow zero value in `amt` for Collection.Incr - will write `def` to bucket if counter does not exist --- base/bucket_gocb_test.go | 35 ++++++++++++++++++++++++----------- base/collection_gocb.go | 3 --- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/base/bucket_gocb_test.go b/base/bucket_gocb_test.go index 9d56239766..8490ec0204 100644 --- a/base/bucket_gocb_test.go +++ b/base/bucket_gocb_test.go @@ -362,22 +362,35 @@ func TestIncrCounter(t *testing.T) { } }() - // New Counter - incr 1, default 1 - value, err := dataStore.Incr(key, 1, 1, 0) - assert.NoError(t, err, "Error incrementing non-existent counter") - - // key did not exist - so expect the "initial" value of 1 - assert.Equal(t, uint64(1), value) + // New Counter - incr 0, default 0 - expect zero-value counter doc to be created + value, err := dataStore.Incr(key, 0, 0, 0) + require.NoError(t, err, "Error incrementing non-existent counter") + require.Equal(t, uint64(0), value) // Retrieve existing counter value using GetCounter retrieval, err := GetCounter(dataStore, key) - assert.NoError(t, err, "Error retrieving value for existing counter") - assert.Equal(t, uint64(1), retrieval) + require.NoError(t, err, "Error retrieving value for existing counter") + require.Equal(t, uint64(0), retrieval) + + // remove zero value so we're able to test default below + require.NoError(t, dataStore.Delete(key)) + + // New Counter - incr 1, default 5 + value, err = dataStore.Incr(key, 1, 5, 0) + require.NoError(t, err, "Error incrementing non-existent counter") + + // key did not exist - so expect the "initial" value of 5 + require.Equal(t, uint64(5), value) + + // Retrieve existing counter value using GetCounter + retrieval, err = GetCounter(dataStore, key) + require.NoError(t, err, "Error retrieving value for existing counter") + require.Equal(t, uint64(5), retrieval) // Increment existing counter - retrieval, err = dataStore.Incr(key, 1, 1, 0) - assert.NoError(t, err, "Error incrementing value for existing counter") - assert.Equal(t, uint64(2), retrieval) + retrieval, err = dataStore.Incr(key, 1, 5, 0) + require.NoError(t, err, "Error incrementing value for existing counter") + require.Equal(t, uint64(6), retrieval) } func TestGetAndTouchRaw(t *testing.T) { diff --git a/base/collection_gocb.go b/base/collection_gocb.go index 0a14c809fe..5be7e21d0a 100644 --- a/base/collection_gocb.go +++ b/base/collection_gocb.go @@ -356,9 +356,6 @@ func (c *Collection) Update(k string, exp uint32, callback sgbucket.UpdateFunc) func (c *Collection) Incr(k string, amt, def uint64, exp uint32) (uint64, error) { c.Bucket.waitForAvailKvOp() defer c.Bucket.releaseKvOp() - if amt == 0 { - return 0, errors.New("amt passed to Incr must be non-zero") - } incrOptions := gocb.IncrementOptions{ Initial: int64(def), Delta: amt, From b49a7520092812d2021bf3e2f341486e00672bff Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Fri, 26 May 2023 06:57:15 -0700 Subject: [PATCH 23/42] Setup manifest for 3.0.8 (#6266) --- manifest/3.0.xml | 4 +- manifest/3.0/3.0.6.xml | 2 +- manifest/3.0/3.0.7.xml | 162 +++++++++++++++++++++++++++++++++++ manifest/product-config.json | 12 ++- 4 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 manifest/3.0/3.0.7.xml diff --git a/manifest/3.0.xml b/manifest/3.0.xml index 2a16128f8a..9a96645ace 100644 --- a/manifest/3.0.xml +++ b/manifest/3.0.xml @@ -25,14 +25,14 @@ licenses/APL2.txt. - + - + diff --git a/manifest/3.0/3.0.6.xml b/manifest/3.0/3.0.6.xml index 218d1abaea..8f38b22ca0 100644 --- a/manifest/3.0/3.0.6.xml +++ b/manifest/3.0/3.0.6.xml @@ -25,7 +25,7 @@ licenses/APL2.txt. - + diff --git a/manifest/3.0/3.0.7.xml b/manifest/3.0/3.0.7.xml new file mode 100644 index 0000000000..137a2c494a --- /dev/null +++ b/manifest/3.0/3.0.7.xml @@ -0,0 +1,162 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/manifest/product-config.json b/manifest/product-config.json index fd5d46acb4..438af693be 100644 --- a/manifest/product-config.json +++ b/manifest/product-config.json @@ -400,10 +400,20 @@ "trigger_blackduck": true, "start_build": 7 }, - "manifest/3.0.xml": { + "manifest/3.0/3.0.7.xml": { + "do-build": false, "release": "3.0.7", "release_name": "Couchbase Sync Gateway 3.0.7", "production": true, + "interval": 1440, + "go_version": "1.16.15", + "trigger_blackduck": true, + "start_build": 7 + }, + "manifest/3.0.xml": { + "release": "3.0.8", + "release_name": "Couchbase Sync Gateway 3.0.8", + "production": true, "interval": 120, "go_version": "1.16.15", "trigger_blackduck": true, From 7093ddad2ad4ebffd6b2b9960f0661a0c571bfa9 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Fri, 26 May 2023 06:58:41 -0700 Subject: [PATCH 24/42] CBG-2983 Close cbgt agents on database close (#6265) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * CBG-2983 Close cbgt agents on database close Manager.Stop() doesn’t close cbgt’s gocb agents - partly because the fts use case is to have one manager that exists for the lifetime of the process. For SG’s usage, where manager lifecycle is bound to a database, we need to shut down these agents when we close the database/importListener. * Improve inline documentation for CloseStatsClients call --- base/dcp_sharded.go | 33 +++++++++++++++++++---------- rest/adminapitest/admin_api_test.go | 27 +++++++++++++++++++++++ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/base/dcp_sharded.go b/base/dcp_sharded.go index 726c9a2dd9..a693e5f784 100644 --- a/base/dcp_sharded.go +++ b/base/dcp_sharded.go @@ -54,6 +54,8 @@ type CbgtContext struct { eventHandlers *sgMgrEventHandlers // Event handler callbacks ctx context.Context // Log context dbName string // Database name + sourceName string // cbgt source name. Store on CbgtContext for access during teardown + sourceUUID string // cbgt source UUID. Store on CbgtContext for access during teardown } // StartShardedDCPFeed initializes and starts a CBGT Manager targeting the provided bucket. @@ -118,11 +120,6 @@ func GenerateLegacyIndexName(dbName string) string { func createCBGTIndex(ctx context.Context, c *CbgtContext, dbName string, configGroupID string, bucket Bucket, spec BucketSpec, scope string, collections []string, numPartitions uint16) error { sourceType := SOURCE_DCP_SG - bucketUUID, err := bucket.UUID() - if err != nil { - return err - } - sourceParams, err := cbgtFeedParams(spec, scope, collections, dbName) if err != nil { return err @@ -193,8 +190,8 @@ func createCBGTIndex(ctx context.Context, c *CbgtContext, dbName string, configG indexType := CBGTIndexTypeSyncGatewayImport + configGroupID err = c.Manager.CreateIndex( sourceType, // sourceType - bucket.GetName(), // sourceName - bucketUUID, // sourceUUID + c.sourceName, // bucket name + c.sourceUUID, // bucket UUID sourceParams, // sourceParams indexType, // indexType indexName, // indexName @@ -349,6 +346,12 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG serverURL, eventHandlers, options) + eventHandlers.manager = mgr + + bucketUUID, err := bucket.UUID() + if err != nil { + return nil, fmt.Errorf("failed to fetch UUID of bucket %v: %w", MD(bucket.GetName()).Redact(), err) + } cbgtContext := &CbgtContext{ Manager: mgr, @@ -356,6 +359,8 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG eventHandlers: eventHandlers, ctx: ctx, dbName: dbName, + sourceName: bucket.GetName(), + sourceUUID: bucketUUID, } if spec.Auth != nil || (spec.Certpath != "" && spec.Keypath != "") { @@ -364,10 +369,6 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG } if spec.IsTLS() { - bucketUUID, err := bucket.UUID() - if err != nil { - return nil, fmt.Errorf("failed to fetch UUID of bucket %v: %w", MD(bucket.GetName()).Redact(), err) - } if spec.TLSSkipVerify { setCbgtRootCertsForBucket(bucketUUID, nil) } else { @@ -467,6 +468,10 @@ func (c *CbgtContext) Stop() { } // ClosePIndex calls are synchronous, so can stop manager once they've completed c.Manager.Stop() + // CloseStatsClients closes the memcached connection cbgt uses for stats calls (highseqno, etc). sourceName and + // sourceUUID are bucketName/bucket UUID in our usage. cbgt has a single global stats connection per bucket, + // but does a refcount check before closing, so handles the case of multiple SG databases targeting the same bucket. + cbgt.CloseStatsClients(c.sourceName, c.sourceUUID) c.RemoveFeedCredentials(c.dbName) } @@ -720,6 +725,7 @@ func GetDefaultImportPartitions(serverless bool) uint16 { type sgMgrEventHandlers struct { ctx context.Context ctxCancel context.CancelFunc + manager *cbgt.Manager } func (meh *sgMgrEventHandlers) OnRefreshManagerOptions(options map[string]string) { @@ -740,6 +746,10 @@ func (meh *sgMgrEventHandlers) OnUnregisterPIndex(pindex *cbgt.PIndex) { // This will trigger cbgt closing and then attempting to reconnect to the feed. func (meh *sgMgrEventHandlers) OnFeedError(srcType string, r cbgt.Feed, feedErr error) { + // cbgt always passes srcType = SOURCE_GOCBCORE, but we have a wrapped type associated with our indexes - use that instead + // for our logging + srcType = SOURCE_DCP_SG + DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, srcType: %s, feed name: %s, err: %v", srcType, r.Name(), feedErr) @@ -771,6 +781,7 @@ func (meh *sgMgrEventHandlers) OnFeedError(srcType string, r cbgt.Feed, feedErr } dcpFeed.NotifyMgrOnClose() } + return } } diff --git a/rest/adminapitest/admin_api_test.go b/rest/adminapitest/admin_api_test.go index 720f05cc4c..51b18b9743 100644 --- a/rest/adminapitest/admin_api_test.go +++ b/rest/adminapitest/admin_api_test.go @@ -4367,3 +4367,30 @@ func TestPerDBCredsOverride(t *testing.T) { assert.Equal(t, "invalidUsername", configs["db"].BucketConfig.Username) assert.Equal(t, "invalidPassword", configs["db"].BucketConfig.Password) } + +// Can be used to reproduce connections left open after database close. Manually deleting the bucket used by the test +// once the test reaches the sleep loop will log connection errors for unclosed connections. +func TestDeleteDatabaseCBGTTeardown(t *testing.T) { + t.Skip("Dev-time test used to repro agent connections being left open after database close") + if base.UnitTestUrlIsWalrus() { + t.Skip("This test only works against Couchbase Server") + } + base.SetUpTestLogging(t, base.LevelTrace, base.KeyHTTP, base.KeyImport) + + rtConfig := rest.RestTesterConfig{DatabaseConfig: &rest.DatabaseConfig{DbConfig: rest.DbConfig{AutoImport: true}}} + rt := rest.NewRestTester(t, &rtConfig) + defer rt.Close() + // Initialize database + _ = rt.GetDatabase() + + for i := 0; i < 1; i++ { + time.Sleep(1 * time.Second) // some time for polling + } + + resp := rt.SendAdminRequest(http.MethodDelete, "/db/", "") + rest.RequireStatus(t, resp, http.StatusOK) + + for i := 0; i < 1000; i++ { + time.Sleep(1 * time.Second) // some time for polling + } +} From c17d66c59f565e1cb4bed3efcf745c34bd80e7ab Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Fri, 26 May 2023 10:45:42 -0400 Subject: [PATCH 25/42] CBG-3024 Make sure CE import uses checkpoints (#6261) * CBG-3024 Make sure import feed uses checkpoints --- base/gocb_dcp_feed.go | 30 +++++++++++++++++------------- db/database_test.go | 10 ++++++---- db/import_test.go | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/base/gocb_dcp_feed.go b/base/gocb_dcp_feed.go index 12edfbe35e..811861c87e 100644 --- a/base/gocb_dcp_feed.go +++ b/base/gocb_dcp_feed.go @@ -50,10 +50,6 @@ func getHighSeqMetadata(cbstore CouchbaseBucketStore) ([]DCPMetadata, error) { // StartGocbDCPFeed starts a DCP Feed. func StartGocbDCPFeed(bucket *GocbV2Bucket, bucketName string, args sgbucket.FeedArguments, callback sgbucket.FeedEventCallbackFunc, dbStats *expvar.Map, metadataStoreType DCPMetadataStoreType, groupID string) error { - metadata, err := getHighSeqMetadata(bucket) - if err != nil { - return err - } feedName, err := GenerateDcpStreamName(args.ID) if err != nil { return err @@ -84,19 +80,27 @@ func StartGocbDCPFeed(bucket *GocbV2Bucket, bucketName string, args sgbucket.Fee } } } + options := DCPClientOptions{ + MetadataStoreType: metadataStoreType, + GroupID: groupID, + DbStats: dbStats, + CollectionIDs: collectionIDs, + AgentPriority: gocbcore.DcpAgentPriorityMed, + CheckpointPrefix: args.CheckpointPrefix, + } + + if args.Backfill == sgbucket.FeedNoBackfill { + metadata, err := getHighSeqMetadata(bucket) + if err != nil { + return err + } + options.InitialMetadata = metadata + } dcpClient, err := NewDCPClient( feedName, callback, - DCPClientOptions{ - MetadataStoreType: metadataStoreType, - GroupID: groupID, - InitialMetadata: metadata, - DbStats: dbStats, - CollectionIDs: collectionIDs, - AgentPriority: gocbcore.DcpAgentPriorityMed, - CheckpointPrefix: args.CheckpointPrefix, - }, + options, bucket) if err != nil { return err diff --git a/db/database_test.go b/db/database_test.go index 275f336760..983b412466 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -49,10 +49,12 @@ func setupTestDBForBucket(t testing.TB, bucket *base.TestBucket) (*Database, con return SetupTestDBForDataStoreWithOptions(t, bucket, dbcOptions) } -func setupTestDBWithOptionsAndImport(t testing.TB, dbcOptions DatabaseContextOptions) (*Database, context.Context) { +func setupTestDBWithOptionsAndImport(t testing.TB, tBucket *base.TestBucket, dbcOptions DatabaseContextOptions) (*Database, context.Context) { ctx := base.TestCtx(t) AddOptionsFromEnvironmentVariables(&dbcOptions) - tBucket := base.GetTestBucket(t) + if tBucket == nil { + tBucket = base.GetTestBucket(t) + } if dbcOptions.Scopes == nil { dbcOptions.Scopes = GetScopesOptions(t, tBucket, 1) } @@ -2456,7 +2458,7 @@ func TestDeleteWithNoTombstoneCreationSupport(t *testing.T) { t.Skip("Xattrs required") } - db, ctx := setupTestDBWithOptionsAndImport(t, DatabaseContextOptions{}) + db, ctx := setupTestDBWithOptionsAndImport(t, nil, DatabaseContextOptions{}) defer db.Close(ctx) collection := GetSingleDatabaseCollectionWithUser(t, db) @@ -2971,7 +2973,7 @@ func TestImportCompactPanic(t *testing.T) { } // Set the compaction and purge interval unrealistically low to reproduce faster - db, ctx := setupTestDBWithOptionsAndImport(t, DatabaseContextOptions{ + db, ctx := setupTestDBWithOptionsAndImport(t, nil, DatabaseContextOptions{ CompactInterval: 1, }) defer db.Close(ctx) diff --git a/db/import_test.go b/db/import_test.go index d41412d169..838e273def 100644 --- a/db/import_test.go +++ b/db/import_test.go @@ -525,3 +525,37 @@ func TestImportStampClusterUUID(t *testing.T) { require.NoError(t, err) require.Equal(t, 32, len(xattr["cluster_uuid"])) } + +// TestImporNonZeroStart makes sure docs written before sync gateway start get imported +func TestImportNonZeroStart(t *testing.T) { + if base.UnitTestUrlIsWalrus() { + t.Skip("test requires import feed, which requies DCP") + } + + bucket := base.GetTestBucket(t) + + doc1 := "doc1" + revID1 := "1-2a9efe8178aa817f4414ae976aa032d9" + + _, err := bucket.GetSingleDataStore().Add(doc1, 0, rawDocNoMeta()) + require.NoError(t, err) + + db, ctx := setupTestDBWithOptionsAndImport(t, bucket, DatabaseContextOptions{}) + defer db.Close(ctx) + + collection := GetSingleDatabaseCollectionWithUser(t, db) + _, ok := base.WaitForStat(func() int64 { + return collection.collectionStats.ImportCount.Value() + }, 1) + require.True(t, ok) + + _, ok = base.WaitForStat(func() int64 { + return db.DbStats.Database().DCPReceivedCount.Value() + }, 1) + require.True(t, ok) + + doc, err := collection.GetDocument(base.TestCtx(t), doc1, DocUnmarshalAll) + require.NoError(t, err) + require.Equal(t, revID1, doc.SyncData.CurrentRev) + +} From b1b017e6b92c30a35e64ff8a4792f756f9235d67 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Tue, 30 May 2023 13:21:08 -0700 Subject: [PATCH 26/42] CBG-3001 Avoid bucket retrieval error during OnFeedClose (#6269) * CBG-3001 Avoid bucket retrieval error during OnFeedClose OnFeedError was checking for bucket existence to determine whether to call NotifyMgrOnClose(). This handling isn't necessary for SG, as we want database close to handle shutdown of the import feed (via importListener.Stop()) in the case of a deleted bucket. --- base/dcp_sharded.go | 62 +++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/base/dcp_sharded.go b/base/dcp_sharded.go index a693e5f784..83b4d3c117 100644 --- a/base/dcp_sharded.go +++ b/base/dcp_sharded.go @@ -740,48 +740,38 @@ func (meh *sgMgrEventHandlers) OnUnregisterPIndex(pindex *cbgt.PIndex) { // No-op for SG } -// OnFeedError is required to trigger reconnection to a feed on an closed connection (EOF). -// Handling below based on cbft implementation - checks whether the underlying source (bucket) -// still exists with VerifySourceNotExists, and if it exists, calls NotifyMgrOnClose. -// This will trigger cbgt closing and then attempting to reconnect to the feed. +// OnFeedError is required to trigger reconnection to a feed on a closed connection (EOF). +// NotifyMgrOnClose will trigger cbgt closing and then attempt to reconnect to the feed, if the manager hasn't +// been stopped. func (meh *sgMgrEventHandlers) OnFeedError(srcType string, r cbgt.Feed, feedErr error) { // cbgt always passes srcType = SOURCE_GOCBCORE, but we have a wrapped type associated with our indexes - use that instead // for our logging srcType = SOURCE_DCP_SG - - DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, srcType: %s, feed name: %s, err: %v", - srcType, r.Name(), feedErr) - + var bucketName, bucketUUID string dcpFeed, ok := r.(cbgt.FeedEx) - if !ok { - return - } - - gone, indexUUID, err := dcpFeed.VerifySourceNotExists() - DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, VerifySourceNotExists,"+ - " srcType: %s, gone: %t, indexUUID: %s, err: %v", - srcType, gone, indexUUID, err) - if !gone { - // If we get an EOF error from the feeds and the bucket is still alive, - // then there could at the least two potential error scenarios. - // - // 1. Faulty kv node is failed over. - // 2. Ephemeral network connection issues with the host. - // - // In either case, the current feed instance turns dangling. - // Hence we can close the feeds so that they get refreshed to fix - // the connectivity problems either during the next rebalance - // (new kv node after failover-recovery rebalance) or - // on the next janitor work cycle(ephemeral network issue to the same node). - if strings.Contains(feedErr.Error(), "EOF") { - // If this wasn't an intentional close, log about the EOF - if meh.ctx.Err() != context.Canceled { - InfofCtx(meh.ctx, KeyDCP, "Handling EOF on cbgt feed - notifying manager to trigger reconnection to feed. indexUUID: %v, err: %v", indexUUID, feedErr) - } - dcpFeed.NotifyMgrOnClose() + if ok { + bucketName, bucketUUID = dcpFeed.GetBucketDetails() + } + DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, srcType: %s, feed name: %s, bucket name: %s, err: %v", + srcType, r.Name(), MD(bucketName), feedErr) + + // If we get an EOF error from the feeds and the import listener hasn't been closed, + // then there could at the least two potential error scenarios. + // + // 1. Faulty kv node is failed over. + // 2. Ephemeral network connection issues with the host. + // + // In either case, the current feed instance turns dangling. + // Hence we can close the feeds so that they get refreshed to fix + // the connectivity problems either during the next rebalance + // (new kv node after failover-recovery rebalance) or + // on the next janitor work cycle(ephemeral network issue to the same node). + if strings.Contains(feedErr.Error(), "EOF") { + // If this wasn't an intentional close, log about the EOF + if meh.ctx.Err() != context.Canceled { + InfofCtx(meh.ctx, KeyDCP, "Handling EOF on cbgt feed - notifying manager to trigger reconnection to feed for bucketName:%v, bucketUUID:%v, err: %v", MD(bucketName), bucketUUID, feedErr) } - return + dcpFeed.NotifyMgrOnClose() } - } From 36d453a3ad3b4469a1729c825ae6414ed653f544 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Tue, 4 Apr 2023 16:15:14 -0700 Subject: [PATCH 27/42] Sync: Sequence-allocation optimization for push --- db/blip_handler.go | 27 ++++++++++++++++++++------- db/sequence_allocator.go | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/db/blip_handler.go b/db/blip_handler.go index 37cfdbb15f..ce9ed28be9 100644 --- a/db/blip_handler.go +++ b/db/blip_handler.go @@ -548,6 +548,9 @@ func (bh *blipHandler) sendBatchOfChanges(sender *blip.Sender, changeArray [][]i } if len(changeArray) > 0 { + // Wait before sending, if client has not caught up: + bh.inFlightChangesThrottle <- struct{}{} + // Check for user updates before creating the db copy for handleChangesResponse if err := bh.refreshUser(); err != nil { return err @@ -563,18 +566,18 @@ func (bh *blipHandler) sendBatchOfChanges(sender *blip.Sender, changeArray [][]i return ErrClosedBLIPSender } - bh.inFlightChangesThrottle <- struct{}{} atomic.AddInt64(&bh.changesPendingResponseCount, 1) - bh.replicationStats.SendChangesCount.Add(int64(len(changeArray))) - // Spawn a goroutine to await the client's response: - go func(bh *blipHandler, sender *blip.Sender, response *blip.Message, changeArray [][]interface{}, sendTime time.Time, dbCollection *DatabaseCollectionWithUser) { - if err := bh.handleChangesResponse(sender, response, changeArray, sendTime, dbCollection, bh.collectionIdx); err != nil { + + // await the client's response: + outrq.OnResponse(func(response *blip.Message) { + if err := bh.handleChangesResponse(sender, response, changeArray, sendTime, handleChangesResponseDbCollection, bh.collectionIdx); err != nil { base.WarnfCtx(bh.loggingCtx, "Error from bh.handleChangesResponse: %v", err) if bh.fatalErrorCallback != nil { bh.fatalErrorCallback(err) } } + base.InfofCtx(bh.loggingCtx, base.KeySync, "...sent requested revs, from %s", changeArray[0][0].(SequenceID).String()) // Sent all of the revs for this changes batch, allow another changes batch to be sent. select { @@ -583,7 +586,7 @@ func (bh *blipHandler) sendBatchOfChanges(sender *blip.Sender, changeArray [][]i } atomic.AddInt64(&bh.changesPendingResponseCount, -1) - }(bh, sender, outrq.Response(), changeArray, sendTime, handleChangesResponseDbCollection) + }) } else { outrq.SetNoReply(true) if !bh.sendBLIPMessage(sender, outrq) { @@ -761,6 +764,7 @@ func (bh *blipHandler) handleProposeChanges(rq *blip.Message) error { output := bytes.NewBuffer(make([]byte, 0, 5*len(changeList))) output.Write([]byte("[")) nWritten := 0 + nRequested := 0 // proposeChanges stats startTime := time.Now() @@ -780,7 +784,10 @@ func (bh *blipHandler) handleProposeChanges(rq *blip.Message) error { if status == ProposedRev_OK_IsNew { // Remember that the doc doesn't exist locally, in order to optimize the upcoming Put: bh.collectionCtx.notePendingInsertion(docID) - } else if status != ProposedRev_OK { + nRequested++ + } else if status == ProposedRev_OK { + nRequested++ + } else { // Reject the proposed change. // Skip writing trailing zeroes; but if we write a number afterwards we have to catch up if nWritten > 0 { @@ -805,6 +812,12 @@ func (bh *blipHandler) handleProposeChanges(rq *blip.Message) error { } } output.Write([]byte("]")) + + if nRequested > 0 { + // Notify the sequenceAllocator it's going to be asked for nRequested sequences soon: + bh.db.sequences.reserveRequest(uint64(nRequested)) + } + response := rq.Response() if bh.sgCanUseDeltas { base.DebugfCtx(bh.loggingCtx, base.KeyAll, "Setting deltas=true property on proposeChanges response") diff --git a/db/sequence_allocator.go b/db/sequence_allocator.go index 1c2f1664f9..db53c0f809 100644 --- a/db/sequence_allocator.go +++ b/db/sequence_allocator.go @@ -53,6 +53,7 @@ type sequenceAllocator struct { lastSequenceReserveTime time.Time // Time of most recent sequence reserve releaseSequenceWait time.Duration // Supports test customization metaKeys *base.MetadataKeys // Key generator for sequence and unused sequence documents + preRequestCount uint64 // Number of sequences pre-requested } func newSequenceAllocator(datastore base.DataStore, dbStatsMap *base.DatabaseStats, metaKeys *base.MetadataKeys) (*sequenceAllocator, error) { @@ -135,6 +136,9 @@ func (s *sequenceAllocator) releaseUnusedSequences() { s.sequenceBatchSize = s.sequenceBatchSize - unusedAmount } + // Ignore any pre-requests if we're releasing sequences + s.preRequestCount = 0 + s.last = s.max s.mutex.Unlock() } @@ -185,9 +189,16 @@ func (s *sequenceAllocator) nextSequence() (sequence uint64, err error) { return sequence, nil } +func (s *sequenceAllocator) reserveRequest(count uint64) { + base.InfofCtx(context.TODO(), base.KeyCRUD, "request for %d sequences on next refill", count) + s.mutex.Lock() + s.preRequestCount += count + s.mutex.Unlock() +} + // Reserve a new sequence range. Called by nextSequence when the previously allocated sequences have all been used. func (s *sequenceAllocator) _reserveSequenceRange() error { - + var count uint64 // If the time elapsed since the last reserveSequenceRange invocation reserve is shorter than our target frequency, // this indicates we're making an incr call more frequently than we want to. Triggers an increase in batch size to // reduce incr frequency. @@ -198,20 +209,36 @@ func (s *sequenceAllocator) _reserveSequenceRange() error { } base.DebugfCtx(context.TODO(), base.KeyCRUD, "Increased sequence batch to %d", s.sequenceBatchSize) } + count = s.sequenceBatchSize + + // If a caller has indicated it will need sequences soon, increase the count: + if s.preRequestCount > count { + count = s.preRequestCount + if count > maxBatchSize { + count = maxBatchSize + } + base.InfofCtx(context.TODO(), base.KeyCRUD, "Reserving %d sequences by special request", count) + } + s.preRequestCount = 0 + + return s._reserveSequenceCount(count) +} - max, err := s.incrementSequence(s.sequenceBatchSize) +// Reserves a specified number of sequences. +func (s *sequenceAllocator) _reserveSequenceCount(count uint64) error { + max, err := s.incrementSequence(count) if err != nil { - base.WarnfCtx(context.TODO(), "Error from incrementSequence in _reserveSequences(%d): %v", s.sequenceBatchSize, err) + base.WarnfCtx(context.TODO(), "Error from incrementSequence in _reserveSequenceCount(%d): %v", count, err) return err } // Update max and last used sequences. Last is updated here to account for sequences allocated/used by other // Sync Gateway nodes s.max = max - s.last = max - s.sequenceBatchSize + s.last = max - count s.lastSequenceReserveTime = time.Now() - s.dbStats.SequenceReservedCount.Add(int64(s.sequenceBatchSize)) + s.dbStats.SequenceReservedCount.Add(int64(count)) return nil } From 60288e1bb721102ac1b43a11617647ffce0f8edb Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Tue, 4 Apr 2023 16:16:33 -0700 Subject: [PATCH 28/42] Sync: Small change-list optimizations - Increase maxInFlightChangesBatches from 2 to 4 - Send `changes` requests as Urgent, as LiteCore does --- db/blip_handler.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/db/blip_handler.go b/db/blip_handler.go index ce9ed28be9..a97e583867 100644 --- a/db/blip_handler.go +++ b/db/blip_handler.go @@ -52,7 +52,7 @@ var kConnectedClientHandlersByProfile = map[string]blipHandlerFunc{ } // maxInFlightChangesBatches is the maximum number of in-flight changes batches a client is allowed to send without being throttled. -const maxInFlightChangesBatches = 2 +const maxInFlightChangesBatches = 4 type blipHandler struct { *BlipSyncContext @@ -535,6 +535,7 @@ func (bh *blipHandler) buildChangesRow(change *ChangeEntry, revID string) []inte func (bh *blipHandler) sendBatchOfChanges(sender *blip.Sender, changeArray [][]interface{}, ignoreNoConflicts bool) error { outrq := blip.NewRequest() + outrq.SetUrgent(true) outrq.SetProfile("changes") if ignoreNoConflicts { outrq.Properties[ChangesMessageIgnoreNoConflicts] = trueProperty From 8e1d95954b20654c7be0a60dace05f6bd08315f6 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Tue, 4 Apr 2023 16:11:05 -0700 Subject: [PATCH 29/42] Sync: Limit concurrency of blip handlers Updated go-blip, with new API for registering & calling request handlers. --- db/blip.go | 2 + db/blip_handler.go | 61 ++++++++++------- db/blip_sync_context.go | 137 ++++++++++++++++++++++--------------- go.mod | 2 +- go.sum | 4 +- rest/attachment_test.go | 8 +-- rest/blip_api_crud_test.go | 61 ++++++++++------- rest/blip_client_test.go | 30 ++++---- rest/utilities_testing.go | 55 +++++++++------ 9 files changed, 214 insertions(+), 146 deletions(-) diff --git a/db/blip.go b/db/blip.go index 39a927e387..198f022729 100644 --- a/db/blip.go +++ b/db/blip.go @@ -57,6 +57,8 @@ func NewSGBlipContextWithProtocols(ctx context.Context, id string, protocol ...s bc, err = blip.NewContextCustomID(id, protocol...) } + bc.MaxDispatchedBytes = BlipMaxIncomingBytesBeingDispatched + bc.LogMessages = base.LogDebugEnabled(base.KeyWebSocket) bc.LogFrames = base.LogDebugEnabled(base.KeyWebSocketFrame) bc.Logger = defaultBlipLogger(ctx) diff --git a/db/blip_handler.go b/db/blip_handler.go index a97e583867..f9cb63083a 100644 --- a/db/blip_handler.go +++ b/db/blip_handler.go @@ -51,6 +51,23 @@ var kConnectedClientHandlersByProfile = map[string]blipHandlerFunc{ MessageGraphQL: userBlipHandler((*blipHandler).handleGraphQL), } +// max number of concurrent handlers for each message type. Default is 0 meaning 'unlimited'. +var handlerConcurrencyByProfile = map[string]int{ + MessageChanges: 1, + MessageProposeChanges: 1, + MessageRev: 16, + MessageGetAttachment: 10, + MessageGetRev: 10, + MessagePutRev: 10, +} + +// Handlers that run immediately, i.e. before the message has completely arrived. +// This guarantees they are called in message order. +var handlerImmediacyByProfile = map[string]bool{ + MessageChanges: true, + MessageProposeChanges: true, +} + // maxInFlightChangesBatches is the maximum number of in-flight changes batches a client is allowed to send without being throttled. const maxInFlightChangesBatches = 4 @@ -557,37 +574,35 @@ func (bh *blipHandler) sendBatchOfChanges(sender *blip.Sender, changeArray [][]i return err } - handleChangesResponseDbCollection, err := bh.copyDatabaseCollectionWithUser(bh.collectionIdx) - if err != nil { - return err - } - sendTime := time.Now() - if !bh.sendBLIPMessage(sender, outrq) { - return ErrClosedBLIPSender - } - atomic.AddInt64(&bh.changesPendingResponseCount, 1) bh.replicationStats.SendChangesCount.Add(int64(len(changeArray))) // await the client's response: outrq.OnResponse(func(response *blip.Message) { - if err := bh.handleChangesResponse(sender, response, changeArray, sendTime, handleChangesResponseDbCollection, bh.collectionIdx); err != nil { - base.WarnfCtx(bh.loggingCtx, "Error from bh.handleChangesResponse: %v", err) - if bh.fatalErrorCallback != nil { - bh.fatalErrorCallback(err) + bh.threadPool.Go(func() { + if err := bh.handleChangesResponse(sender, response, changeArray, sendTime, bh.collectionIdx); err != nil { + base.WarnfCtx(bh.loggingCtx, "Error from bh.handleChangesResponse: %v", err) + if bh.fatalErrorCallback != nil { + bh.fatalErrorCallback(err) + } } - } - base.InfofCtx(bh.loggingCtx, base.KeySync, "...sent requested revs, from %s", changeArray[0][0].(SequenceID).String()) + base.InfofCtx(bh.loggingCtx, base.KeySync, "...sent requested revs, from %s", changeArray[0][0].(SequenceID).String()) - // Sent all of the revs for this changes batch, allow another changes batch to be sent. - select { - case <-bh.inFlightChangesThrottle: - case <-bh.terminator: - } + // Sent all of the revs for this changes batch, allow another changes batch to be sent. + select { + case <-bh.inFlightChangesThrottle: + case <-bh.terminator: + } - atomic.AddInt64(&bh.changesPendingResponseCount, -1) + atomic.AddInt64(&bh.changesPendingResponseCount, -1) + }) }) + + if !bh.sendBLIPMessage(sender, outrq) { + return ErrClosedBLIPSender + } + } else { outrq.SetNoReply(true) if !bh.sendBLIPMessage(sender, outrq) { @@ -1326,7 +1341,7 @@ func (bh *blipHandler) sendGetAttachment(sender *blip.Sender, docID string, name return nil, ErrClosedBLIPSender } - resp := outrq.Response() + resp := outrq.Response() // TODO: Don't block the handler! respBody, err := resp.Body() if err != nil { @@ -1372,7 +1387,7 @@ func (bh *blipHandler) sendProveAttachment(sender *blip.Sender, docID, name, dig return ErrClosedBLIPSender } - resp := outrq.Response() + resp := outrq.Response() // TODO: Don't block the handler! body, err := resp.Body() if err != nil { diff --git a/db/blip_sync_context.go b/db/blip_sync_context.go index b90560a4e0..8ca5f0a01d 100644 --- a/db/blip_sync_context.go +++ b/db/blip_sync_context.go @@ -27,9 +27,17 @@ import ( ) const ( - // Blip default vals + // Number of revisions to include in a 'changes' message BlipDefaultBatchSize = uint64(200) BlipMinimumBatchSize = uint64(10) // Not in the replication spec - is this required? + + // Number of goroutines handling incoming BLIP requests (and other tasks) + BlipThreadPoolSize = 5 + + // Maximum total size of incoming BLIP requests that are currently being dispatched and handled. + // Above this amount, the BLIP engine stops reading from the WebSocket, applying back-pressure + // to the client and keeping memory usage down. + BlipMaxIncomingBytesBeingDispatched = 100000 // bytes ) var ErrClosedBLIPSender = errors.New("use of closed BLIP sender") @@ -45,6 +53,7 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con replicationStats: replicationStats, inFlightChangesThrottle: make(chan struct{}, maxInFlightChangesBatches), collections: &blipCollections{}, + threadPool: blip.ThreadPool{Concurrency: BlipThreadPoolSize}, } if bsc.replicationStats == nil { bsc.replicationStats = NewBlipSyncStats() @@ -60,22 +69,26 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con } // Register default handlers - bc.DefaultHandler = bsc.NotFoundHandler bc.FatalErrorHandler = func(err error) { base.InfofCtx(ctx, base.KeyHTTP, "%s: --> BLIP+WebSocket connection error: %v", contextID, err) } + dispatcher := &blip.ByProfileDispatcher{} + dispatcher.SetDefaultHandler(bsc.NotFoundHandler) + // Register 2.x replicator handlers for profile, handlerFn := range handlersByProfile { - bsc.register(profile, handlerFn) + bsc.register(dispatcher, profile, handlerFn) } - if db.Options.UnsupportedOptions.ConnectedClient { // Register Connected Client handlers for profile, handlerFn := range kConnectedClientHandlersByProfile { - bsc.register(profile, handlerFn) + bsc.register(dispatcher, profile, handlerFn) } } + bsc.blipContext.RequestHandler = dispatcher.Dispatch + bsc.threadPool.Start() + return bsc } @@ -83,6 +96,7 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con // This connection remains open until the client closes it, and can receive any number of requests. type BlipSyncContext struct { blipContext *blip.Context + threadPool blip.ThreadPool blipContextDb *Database // 'master' database instance for the replication, used as source when creating handler-specific databases loggingCtx context.Context // logging context for connection dbUserLock sync.RWMutex // Must be held when refreshing the db user @@ -142,10 +156,11 @@ func (bsc *BlipSyncContext) SetClientType(clientType BLIPSyncContextClientType) // Registers a BLIP handler including the outer-level work of logging & error handling. // Includes the outer handler as a nested function. -func (bsc *BlipSyncContext) register(profile string, handlerFn func(*blipHandler, *blip.Message) error) { +func (bsc *BlipSyncContext) register(dispatcher *blip.ByProfileDispatcher, profile string, handlerFn func(*blipHandler, *blip.Message) error) { // Wrap the handler function with a function that adds handling needed by all handlers - handlerFnWrapper := func(rq *blip.Message) { + handler := func(rq *blip.Message, onComplete func()) { + defer onComplete() // Recover to log panic from handlers and repanic for go-blip response handling defer func() { @@ -210,8 +225,19 @@ func (bsc *BlipSyncContext) register(profile string, handlerFn func(*blipHandler bsc.reportStats(false) } - bsc.blipContext.HandlerForProfile[profile] = handlerFnWrapper + // Handlers run on the thread pool + handler = bsc.threadPool.WrapAsyncHandler(handler) + if concurrency := handlerConcurrencyByProfile[profile]; concurrency > 0 { + // Limit number of concurrently running handlers for some profiles: + throttle := blip.ThrottlingDispatcher{ + Handler: handler, + MaxConcurrency: concurrency, + } + handler = throttle.Dispatch + } + + dispatcher.SetHandler(profile, handler) } func (bsc *BlipSyncContext) Close() { @@ -228,15 +254,16 @@ func (bsc *BlipSyncContext) Close() { collection.changesCtxCancel() } bsc.reportStats(true) + bsc.threadPool.Stop() close(bsc.terminator) }) } // NotFoundHandler is used for unknown requests -func (bsc *BlipSyncContext) NotFoundHandler(rq *blip.Message) { +func (bsc *BlipSyncContext) NotFoundHandler(rq *blip.Message, onComplete func()) { base.InfofCtx(bsc.loggingCtx, base.KeySync, "%s Type:%q", rq, rq.Profile()) base.InfofCtx(bsc.loggingCtx, base.KeySync, "%s --> 404 Unknown profile", rq) - blip.Unhandled(rq) + blip.UnhandledAsync(rq, onComplete) } func (bsc *BlipSyncContext) copyContextDatabase() *Database { @@ -439,61 +466,61 @@ func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docI } if awaitResponse { - go func(activeSubprotocol string) { - defer func() { - if panicked := recover(); panicked != nil { - bsc.replicationStats.NumHandlersPanicked.Add(1) - base.WarnfCtx(bsc.loggingCtx, "PANIC handling 'sendRevision' response: %v\n%s", panicked, debug.Stack()) - bsc.Close() + outrq.OnResponse(func(resp *blip.Message) { + bsc.threadPool.Go(func() { + defer func() { + if panicked := recover(); panicked != nil { + bsc.replicationStats.NumHandlersPanicked.Add(1) + base.WarnfCtx(bsc.loggingCtx, "PANIC handling 'sendRevision' response: %v\n%s", panicked, debug.Stack()) + bsc.Close() + } + }() + + respBody, err := resp.Body() + if err != nil { + base.WarnfCtx(bsc.loggingCtx, "couldn't get response body for rev: %v", err) } - }() - resp := outrq.Response() // blocks till reply is received + base.TracefCtx(bsc.loggingCtx, base.KeySync, "Received response for sendRevisionWithProperties rev message %s/%s", base.UD(docID), revID) - respBody, err := resp.Body() - if err != nil { - base.WarnfCtx(bsc.loggingCtx, "couldn't get response body for rev: %v", err) - } + if resp.Type() == blip.ErrorType { + bsc.replicationStats.SendRevErrorTotal.Add(1) + base.InfofCtx(bsc.loggingCtx, base.KeySync, "error %s in response to rev: %s", resp.Properties["Error-Code"], respBody) - base.TracefCtx(bsc.loggingCtx, base.KeySync, "Received response for sendRevisionWithProperties rev message %s/%s", base.UD(docID), revID) - - if resp.Type() == blip.ErrorType { - bsc.replicationStats.SendRevErrorTotal.Add(1) - base.InfofCtx(bsc.loggingCtx, base.KeySync, "error %s in response to rev: %s", resp.Properties["Error-Code"], respBody) - - if errorDomainIsHTTP(resp) { - switch resp.Properties["Error-Code"] { - case "409": - bsc.replicationStats.SendRevErrorConflictCount.Add(1) - case "403": - bsc.replicationStats.SendRevErrorRejectedCount.Add(1) - case "422", "404": - // unprocessable entity, CBL has not been able to use the delta we sent, so we should re-send the revision in full - if resendFullRevisionFunc != nil { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "sending full body replication for doc %s/%s due to unprocessable entity", base.UD(docID), revID) - if err := resendFullRevisionFunc(); err != nil { - base.WarnfCtx(bsc.loggingCtx, "unable to resend revision: %v", err) - } - } - case "500": - // runtime exceptions return 500 status codes, but we have no other way to determine if this 500 error was caused by the sync-function than matching on the error message. - if bytes.Contains(respBody, []byte("JS sync function")) { + if errorDomainIsHTTP(resp) { + switch resp.Properties["Error-Code"] { + case "409": + bsc.replicationStats.SendRevErrorConflictCount.Add(1) + case "403": bsc.replicationStats.SendRevErrorRejectedCount.Add(1) - } else { - bsc.replicationStats.SendRevErrorOtherCount.Add(1) + case "422", "404": + // unprocessable entity, CBL has not been able to use the delta we sent, so we should re-send the revision in full + if resendFullRevisionFunc != nil { + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "sending full body replication for doc %s/%s due to unprocessable entity", base.UD(docID), revID) + if err := resendFullRevisionFunc(); err != nil { + base.WarnfCtx(bsc.loggingCtx, "unable to resend revision: %v", err) + } + } + case "500": + // runtime exceptions return 500 status codes, but we have no other way to determine if this 500 error was caused by the sync-function than matching on the error message. + if bytes.Contains(respBody, []byte("JS sync function")) { + bsc.replicationStats.SendRevErrorRejectedCount.Add(1) + } else { + bsc.replicationStats.SendRevErrorOtherCount.Add(1) + } } } + } else { + bsc.replicationStats.SendRevCount.Add(1) } - } else { - bsc.replicationStats.SendRevCount.Add(1) - } - bsc.removeAllowedAttachments(docID, attMeta, activeSubprotocol) + bsc.removeAllowedAttachments(docID, attMeta, activeSubprotocol) - if collectionCtx.sgr2PushProcessedSeqCallback != nil { - collectionCtx.sgr2PushProcessedSeqCallback(seq) - } - }(activeSubprotocol) + if collectionCtx.sgr2PushProcessedSeqCallback != nil { + collectionCtx.sgr2PushProcessedSeqCallback(seq) + } + }) + }) } return nil diff --git a/go.mod b/go.mod index d13419c0d2..2a5e0ce07a 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/coreos/go-oidc v2.2.1+incompatible github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 github.com/couchbase/clog v0.1.0 - github.com/couchbase/go-blip v0.0.0-20230510201532-fcadc404bd41 + github.com/couchbase/go-blip v0.0.0-20230515195238-a7b936f01f65 github.com/couchbase/go-couchbase v0.1.1 github.com/couchbase/gocb/v2 v2.6.2 github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911 diff --git a/go.sum b/go.sum index f8dfff29f7..2be4257b00 100644 --- a/go.sum +++ b/go.sum @@ -69,8 +69,8 @@ github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 h1:tRxeXfSHBzAq6m github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46/go.mod h1:tJF3TUUO3ZDBU15auN1gNsIVY3Oo+jj46zIXH4RBxk4= github.com/couchbase/clog v0.1.0 h1:4Kh/YHkhRjMCbdQuvRVsm39XZh4FtL1d8fAwJsHrEPY= github.com/couchbase/clog v0.1.0/go.mod h1:7tzUpEOsE+fgU81yfcjy5N1H6XtbVC8SgOz/3mCjmd4= -github.com/couchbase/go-blip v0.0.0-20230510201532-fcadc404bd41 h1:pjBwvGjhloggITOU9Fqg4yQ/lbZJUHnz8OsYUUczQDw= -github.com/couchbase/go-blip v0.0.0-20230510201532-fcadc404bd41/go.mod h1:nSpldGTqAhTOaDDL0Li2dSE0smqbISKagT7fIqYIRec= +github.com/couchbase/go-blip v0.0.0-20230515195238-a7b936f01f65 h1:9LSHcwSzpLj7/M3sKZiWALc0kdNApWGdlY2Q7WbuaF4= +github.com/couchbase/go-blip v0.0.0-20230515195238-a7b936f01f65/go.mod h1:WstEZkP0F1n3ev8e2mzoUqe4pKYHSJzA1uv73ke/GNQ= github.com/couchbase/go-couchbase v0.1.1 h1:ClFXELcKj/ojyoTYbsY34QUrrYCBi/1G749sXSCkdhk= github.com/couchbase/go-couchbase v0.1.1/go.mod h1:+/bddYDxXsf9qt0xpDUtRR47A2GjaXmGGAqQ/k3GJ8A= github.com/couchbase/gocb/v2 v2.6.2 h1:sZg0+3GiYW7OT53ENEGnkkQMXhVuJ1qOJplvZDlM5Xk= diff --git a/rest/attachment_test.go b/rest/attachment_test.go index d182d90772..1874307f2c 100644 --- a/rest/attachment_test.go +++ b/rest/attachment_test.go @@ -2840,17 +2840,17 @@ func TestProveAttachmentNotFound(t *testing.T) { attachmentData := []byte("attachmentA") attachmentDataEncoded := base64.StdEncoding.EncodeToString(attachmentData) - bt.blipContext.HandlerForProfile[db.MessageProveAttachment] = func(msg *blip.Message) { + bt.dispatcher.SetHandler(db.MessageProveAttachment, blip.AsAsyncHandler(func(msg *blip.Message) { status, errMsg := base.ErrorAsHTTPStatus(db.ErrAttachmentNotFound) msg.Response().SetError("HTTP", status, errMsg) - } + })) // Handler for when full attachment is requested - bt.blipContext.HandlerForProfile[db.MessageGetAttachment] = func(msg *blip.Message) { + bt.dispatcher.SetHandler(db.MessageGetAttachment, blip.AsAsyncHandler(func(msg *blip.Message) { resp := msg.Response() resp.SetBody(attachmentData) resp.SetCompressed(msg.Properties[db.BlipCompress] == "true") - } + })) // Initial set up sent, _, _, err := bt.SendRev("doc1", "1-abc", []byte(`{"key": "val", "_attachments": {"attachment": {"data": "`+attachmentDataEncoded+`"}}}`), blip.Properties{}) diff --git a/rest/blip_api_crud_test.go b/rest/blip_api_crud_test.go index bae3c89a18..0b5133c7d6 100644 --- a/rest/blip_api_crud_test.go +++ b/rest/blip_api_crud_test.go @@ -104,7 +104,7 @@ func TestBlipPushRevisionInspectChanges(t *testing.T) { receivedChangesRequestWg := sync.WaitGroup{} // When this test sends subChanges, Sync Gateway will send a changes request that must be handled - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { log.Printf("got changes message: %+v", request) body, err := request.Body() @@ -136,7 +136,7 @@ func TestBlipPushRevisionInspectChanges(t *testing.T) { receivedChangesRequestWg.Done() - } + }) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := bt.newRequest() @@ -176,11 +176,11 @@ func TestContinuousChangesSubscription(t *testing.T) { var numbatchesReceived int32 nonIntegerSequenceReceived := false changeCount := 0 - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { body, err := request.Body() require.NoError(t, err) - log.Printf("got change with body %s, count %d", body, changeCount) + log.Printf("got change msg #%d with body %s, count %d", request.SerialNumber(), body, changeCount) if string(body) != "null" { atomic.AddInt32(&numbatchesReceived, 1) @@ -199,7 +199,7 @@ func TestContinuousChangesSubscription(t *testing.T) { // Make sure sequence numbers are monotonically increasing receivedSeq, ok := change[0].(float64) if ok { - assert.True(t, receivedSeq > lastReceivedSeq) + assert.Greater(t, receivedSeq, lastReceivedSeq, "Sequences out of order") lastReceivedSeq = receivedSeq } else { nonIntegerSequenceReceived = true @@ -228,7 +228,7 @@ func TestContinuousChangesSubscription(t *testing.T) { response.SetBody(emptyResponseValBytes) } - } + }) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := bt.newRequest() @@ -295,8 +295,8 @@ func TestBlipOneShotChangesSubscription(t *testing.T) { lastReceivedSeq := float64(0) var numbatchesReceived int32 nonIntegerSequenceReceived := false - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + changesHandler := func(request *blip.Message) { body, err := request.Body() require.NoError(t, err) @@ -318,7 +318,7 @@ func TestBlipOneShotChangesSubscription(t *testing.T) { // Make sure sequence numbers are monotonically increasing receivedSeq, ok := change[0].(float64) if ok { - assert.True(t, receivedSeq > lastReceivedSeq) + assert.Greater(t, receivedSeq, lastReceivedSeq) lastReceivedSeq = receivedSeq } else { nonIntegerSequenceReceived = true @@ -347,8 +347,13 @@ func TestBlipOneShotChangesSubscription(t *testing.T) { assert.NoError(t, err, "Error marshalling response") response.SetBody(emptyResponseValBytes) } + } + dispatcher := blip.ThrottlingDispatcher{ + MaxConcurrency: 1, + Handler: blip.AsAsyncHandler(changesHandler), } + bt.blipContext.RequestHandler = dispatcher.Dispatch // Increment waitgroup to account for the expected 'caught up' nil changes entry. receivedChangesWg.Add(1) @@ -456,14 +461,18 @@ func TestBlipSubChangesDocIDFilter(t *testing.T) { } // When this test sends subChanges, Sync Gateway will send a changes request that must be handled + var changesMutex sync.Mutex lastReceivedSeq := float64(0) var numbatchesReceived int32 nonIntegerSequenceReceived := false - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { + changesMutex.Lock() + defer changesMutex.Unlock() body, err := request.Body() require.NoError(t, err) + log.Printf("Received changes: %s", body) //TEMP if string(body) != "null" { @@ -483,7 +492,7 @@ func TestBlipSubChangesDocIDFilter(t *testing.T) { // Make sure sequence numbers are monotonically increasing receivedSeq, ok := change[0].(float64) if ok { - assert.True(t, receivedSeq > lastReceivedSeq) + assert.Greater(t, receivedSeq, lastReceivedSeq) lastReceivedSeq = receivedSeq } else { nonIntegerSequenceReceived = true @@ -523,7 +532,7 @@ func TestBlipSubChangesDocIDFilter(t *testing.T) { response.SetBody(emptyResponseValBytes) } - } + }) // Increment waitgroup to account for the expected 'caught up' nil changes entry. receivedChangesWg.Add(1) @@ -885,7 +894,7 @@ function(doc, oldDoc) { // Write a doc that grants access to itself for the active replication's user func TestContinuousChangesDynamicGrant(t *testing.T) { - base.SetUpTestLogging(t, base.LevelInfo, base.KeyHTTP, base.KeySync, base.KeySyncMsg, base.KeyChanges, base.KeyCache) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyHTTP, base.KeySync, base.KeySyncMsg, base.KeyWebSocket, base.KeyChanges, base.KeyCache) // Initialize restTester here, so that we can use custom sync function, and later modify user syncFunction := ` function(doc, oldDoc) { @@ -916,7 +925,7 @@ function(doc, oldDoc) { var numbatchesReceived int32 nonIntegerSequenceReceived := false changeCount := 0 - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { body, err := request.Body() require.NoError(t, err) @@ -928,7 +937,7 @@ function(doc, oldDoc) { // Expected changes body: [[1,"foo","1-abc"]] changeListReceived := [][]interface{}{} err = base.JSONUnmarshal(body, &changeListReceived) - assert.NoError(t, err, "Error unmarshalling changes received") + assert.NoError(t, err, "Error unmarshalling changes received: %s", body) for _, change := range changeListReceived { @@ -962,10 +971,10 @@ function(doc, oldDoc) { response.SetBody(responseValBytes) } - } + }) // -------- Rev handler callback -------- - bt.blipContext.HandlerForProfile["rev"] = func(request *blip.Message) { + bt.dispatcher.SetHandler("rev", blip.AsAsyncHandler(func(request *blip.Message) { defer revsFinishedWg.Done() body, err := request.Body() require.NoError(t, err) @@ -979,7 +988,7 @@ function(doc, oldDoc) { _, isRemoved := doc[db.BodyRemoved] assert.False(t, isRemoved) - } + })) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := bt.newRequest() @@ -1043,7 +1052,7 @@ function(doc, oldDoc) { var numbatchesReceived int32 nonIntegerSequenceReceived := false changeCount := 0 - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { body, err := request.Body() require.NoError(t, err) @@ -1092,10 +1101,10 @@ function(doc, oldDoc) { response.SetBody(responseValBytes) } - } + }) // -------- Rev handler callback -------- - bt.blipContext.HandlerForProfile["rev"] = func(request *blip.Message) { + bt.dispatcher.SetHandler("rev", blip.AsAsyncHandler(func(request *blip.Message) { defer revsFinishedWg.Done() body, err := request.Body() require.NoError(t, err) @@ -1108,7 +1117,7 @@ function(doc, oldDoc) { _, isRemoved := doc[db.BodyRemoved] require.False(t, isRemoved, fmt.Sprintf("Document %v shouldn't be removed", request.Properties[db.RevMessageID])) - } + })) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := bt.newRequest() @@ -1790,6 +1799,8 @@ func TestMissingNoRev(t *testing.T) { defer rt.Close() ctx := rt.Context() + base.SetUpTestLogging(t, base.LevelInfo, base.KeyCRUD, base.KeySync, base.KeySyncMsg, base.KeyWebSocket) + bt, err := NewBlipTesterFromSpecWithRT(t, nil, rt) require.NoError(t, err, "Unexpected error creating BlipTester") defer bt.Close() @@ -2163,7 +2174,7 @@ func TestMultipleOutstandingChangesSubscriptions(t *testing.T) { bt := NewBlipTesterDefaultCollection(t) defer bt.Close() - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { if !request.NoReply() { // Send an empty response to avoid the Sync: Invalid response to 'changes' message response := request.Response() @@ -2172,7 +2183,7 @@ func TestMultipleOutstandingChangesSubscriptions(t *testing.T) { assert.NoError(t, err, "Error marshalling response") response.SetBody(emptyResponseValBytes) } - } + }) pullStats := bt.restTester.GetDatabase().DbStats.CBLReplicationPull() require.EqualValues(t, 0, pullStats.NumPullReplTotalContinuous.Value()) @@ -2561,10 +2572,10 @@ func TestSendRevisionNoRevHandling(t *testing.T) { // Change noRev handler so it's known when a noRev is received recievedNoRevs := make(chan *blip.Message) - btc.pullReplication.bt.blipContext.HandlerForProfile[db.MessageNoRev] = func(msg *blip.Message) { + btc.pullReplication.bt.dispatcher.SetHandler(db.MessageNoRev, blip.AsAsyncHandler(func(msg *blip.Message) { fmt.Println("Received noRev", msg.Properties) recievedNoRevs <- msg - } + })) resp := rt.SendAdminRequest(http.MethodPut, "/{{.keyspace}}/"+docName, `{"foo":"bar"}`) RequireStatus(t, resp, http.StatusCreated) diff --git a/rest/blip_client_test.go b/rest/blip_client_test.go index cf4e2b554c..0303995486 100644 --- a/rest/blip_client_test.go +++ b/rest/blip_client_test.go @@ -97,7 +97,7 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { btr.replicationStats = db.NewBlipSyncStats() } - btr.bt.blipContext.HandlerForProfile[db.MessageProveAttachment] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageProveAttachment, blip.AsAsyncHandler(func(msg *blip.Message) { btr.storeMessage(msg) nonce, err := msg.Body() @@ -126,9 +126,9 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { resp := msg.Response() resp.SetBody([]byte(proof)) btr.replicationStats.ProveAttachment.Add(1) - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageChanges] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageChanges, blip.AsAsyncHandler(func(msg *blip.Message) { btr.storeMessage(msg) btcr := btc.getCollectionClientFromMessage(msg) @@ -217,13 +217,13 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { } response.SetBody(b) - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageProposeChanges] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageProposeChanges, blip.AsAsyncHandler(func(msg *blip.Message) { btc.pullReplication.storeMessage(msg) - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageRev] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageRev, blip.AsAsyncHandler(func(msg *blip.Message) { btc.pullReplication.storeMessage(msg) btcr := btc.getCollectionClientFromMessage(msg) @@ -439,9 +439,9 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { response := msg.Response() response.SetBody([]byte(`[]`)) } - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageGetAttachment] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageGetAttachment, blip.AsAsyncHandler(func(msg *blip.Message) { btr.storeMessage(msg) digest, ok := msg.Properties[db.GetAttachmentDigest] @@ -459,17 +459,17 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { response := msg.Response() response.SetBody(attachment) btr.replicationStats.GetAttachment.Add(1) - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageNoRev] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageNoRev, blip.AsAsyncHandler(func(msg *blip.Message) { // TODO: Support norev messages btr.storeMessage(msg) - } + })) - btr.bt.blipContext.DefaultHandler = func(msg *blip.Message) { + btr.bt.dispatcher.SetDefaultHandler(blip.AsAsyncHandler(func(msg *blip.Message) { btr.storeMessage(msg) base.PanicfCtx(context.TODO(), "Unknown profile: %s caught by client DefaultHandler - msg: %#v", msg.Profile(), msg) - } + })) } // saveAttachment takes a content-type, and base64 encoded data and stores the attachment on the client @@ -702,6 +702,8 @@ func (btcc *BlipTesterCollectionClient) StartOneshotPullRequestPlus() (err error return btcc.StartPullSince("false", "0", "false", "", "true") } +//////// HELPER FUNCTIONS: + // StartPullSince will begin a pull replication between the client and server with the given params. func (btc *BlipTesterCollectionClient) StartPullSince(continuous, since, activeOnly, channels, requestPlus string) (err error) { subChangesRequest := blip.NewRequest() diff --git a/rest/utilities_testing.go b/rest/utilities_testing.go index 1960d983e1..d8a586c881 100644 --- a/rest/utilities_testing.go +++ b/rest/utilities_testing.go @@ -1243,6 +1243,8 @@ type BlipTester struct { // with this websocket connection blipContext *blip.Context + dispatcher blip.ByProfileDispatcher + // The blip sender that can be used for sending messages over the websocket connection sender *blip.Sender @@ -1391,6 +1393,7 @@ func createBlipTesterWithSpec(tb testing.TB, spec BlipTesterSpec, rt *RestTester } // Ensure that errors get correctly surfaced in tests + bt.blipContext.RequestHandler = bt.dispatcher.Dispatch bt.blipContext.FatalErrorHandler = func(err error) { tb.Fatalf("BLIP fatal error: %v", err) } @@ -1558,6 +1561,17 @@ func (bt *BlipTester) SendRev(docId, docRev string, body []byte, properties blip } +// Registers a function as a handler for "changes" messages. Ensures that the messages are +// handled one at a time to avoid race conditions: some of the code assumes that each call sees +// sequences higher than the last, and some of them need the "null" caught-up to come last. +func (bt *BlipTester) RegisterChangesHandler(changesHandler blip.SynchronousHandler) { + throttle := blip.ThrottlingDispatcher{ + MaxConcurrency: 1, + Handler: blip.AsAsyncHandler(changesHandler), + } + bt.dispatcher.SetHandler("changes", throttle.Dispatch) +} + // GetUserPayload will take username, password, email, channels and roles you want to assign a user and create the appropriate payload for the _user endpoint func GetUserPayload(t testing.TB, username, password, email string, collection *db.DatabaseCollection, chans, roles []string) string { config := auth.PrincipalConfig{} @@ -1674,16 +1688,16 @@ func (bt *BlipTester) GetDocAtRev(requestedDocID, requestedDocRev string) (resul defer func() { // Clean up all profile handlers that are registered as part of this test - delete(bt.blipContext.HandlerForProfile, "changes") - delete(bt.blipContext.HandlerForProfile, "rev") + bt.dispatcher.SetHandler("changes", nil) + bt.dispatcher.SetHandler("rev", nil) }() // -------- Changes handler callback -------- - bt.blipContext.HandlerForProfile["changes"] = getChangesHandler(&changesFinishedWg, &revsFinishedWg) + bt.RegisterChangesHandler(getChangesHandler(&changesFinishedWg, &revsFinishedWg)) // -------- Rev handler callback -------- - bt.blipContext.HandlerForProfile["rev"] = func(request *blip.Message) { - + bt.dispatcher.SetHandler("rev", func(request *blip.Message, onComplete func()) { + defer onComplete() defer revsFinishedWg.Done() body, err := request.Body() if err != nil { @@ -1703,8 +1717,7 @@ func (bt *BlipTester) GetDocAtRev(requestedDocID, requestedDocRev string) (resul if docId == requestedDocID && docRev == requestedDocRev { resultDoc = doc } - - } + }) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back changesFinishedWg.Add(1) @@ -1741,7 +1754,7 @@ func (bt *BlipTester) SendRevWithAttachment(input SendRevWithAttachmentInput) (s defer func() { // Clean up all profile handlers that are registered as part of this test - delete(bt.blipContext.HandlerForProfile, "getAttachment") + bt.dispatcher.SetHandler("getAttachment", nil) }() // Create a doc with an attachment @@ -1772,14 +1785,14 @@ func (bt *BlipTester) SendRevWithAttachment(input SendRevWithAttachmentInput) (s getAttachmentWg := sync.WaitGroup{} - bt.blipContext.HandlerForProfile["getAttachment"] = func(request *blip.Message) { + bt.dispatcher.SetHandler("getAttachment", blip.AsAsyncHandler(func(request *blip.Message) { defer getAttachmentWg.Done() if request.Properties["digest"] != myAttachment.Digest { panic(fmt.Sprintf("Unexpected digest. Got: %v, expected: %v", request.Properties["digest"], myAttachment.Digest)) } response := request.Response() response.SetBody([]byte(input.attachmentBody)) - } + })) // Push a rev with an attachment. getAttachmentWg.Add(1) @@ -1827,7 +1840,7 @@ func (bt *BlipTester) GetChanges() (changes [][]interface{}) { defer func() { // Clean up all profile handlers that are registered as part of this test - delete(bt.blipContext.HandlerForProfile, "changes") // a handler for this profile is registered in SubscribeToChanges + bt.dispatcher.SetHandler("changes", nil) // a handler for this profile is registered in SubscribeToChanges }() collectedChanges := [][]interface{}{} @@ -1897,7 +1910,6 @@ func (bt *BlipTester) WaitForNumDocsViaChanges(numDocsExpected int) (docs map[st // It is basically a pull replication without the checkpointing // Warning: this can only be called from a single goroutine, given the fact it registers profile handlers. func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { - docs = map[string]RestDocument{} // Mutex to avoid write contention on docs while PullDocs is running (as rev messages may be processed concurrently) @@ -1907,17 +1919,16 @@ func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { defer func() { // Clean up all profile handlers that are registered as part of this test - delete(bt.blipContext.HandlerForProfile, "changes") - delete(bt.blipContext.HandlerForProfile, "rev") + bt.dispatcher.SetHandler("changes", nil) + bt.dispatcher.SetHandler("rev", nil) }() // -------- Changes handler callback -------- // When this test sends subChanges, Sync Gateway will send a changes request that must be handled - bt.blipContext.HandlerForProfile["changes"] = getChangesHandler(&changesFinishedWg, &revsFinishedWg) + bt.RegisterChangesHandler(getChangesHandler(&changesFinishedWg, &revsFinishedWg)) // -------- Rev handler callback -------- - bt.blipContext.HandlerForProfile["rev"] = func(request *blip.Message) { - + bt.dispatcher.SetHandler("rev", blip.AsAsyncHandler(func(request *blip.Message) { defer revsFinishedWg.Done() body, err := request.Body() if err != nil { @@ -1971,15 +1982,15 @@ func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { response.SetBody([]byte{}) // Empty response to indicate success } - } + })) // -------- Norev handler callback -------- - bt.blipContext.HandlerForProfile["norev"] = func(request *blip.Message) { + bt.dispatcher.SetHandler("norev", blip.AsAsyncHandler(func(request *blip.Message) { // If a norev is received, then don't bother waiting for one of the expected revisions, since it will never come. // The norev could be added to the returned docs map, but so far there is no need for that. The ability // to assert on the number of actually received revisions (which norevs won't affect) meets current test requirements. defer revsFinishedWg.Done() - } + })) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back changesFinishedWg.Add(1) @@ -2004,7 +2015,7 @@ func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { func (bt *BlipTester) SubscribeToChanges(continuous bool, changes chan<- *blip.Message) { // When this test sends subChanges, Sync Gateway will send a changes request that must be handled - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { changes <- request @@ -2019,7 +2030,7 @@ func (bt *BlipTester) SubscribeToChanges(continuous bool, changes chan<- *blip.M response.SetBody(emptyResponseValBytes) } - } + }) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := blip.NewRequest() From a80a9dac99559e18bfacabdc4ac8e8c79adb76a8 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Tue, 9 May 2023 14:24:08 -0700 Subject: [PATCH 30/42] Flow-control of outgoing 'rev' messages `blipRevSender` manages a list of `revToSend` structs identifying revisions to be sent as `rev` messages, and sends them at a limited rate so that the number being written to the socket and their total size don't exceed a limit. --- db/blip_handler.go | 47 +++++------ db/blip_rev_sender.go | 170 ++++++++++++++++++++++++++++++++++++++++ db/blip_sync_context.go | 151 ++++++++++++++++------------------- 3 files changed, 262 insertions(+), 106 deletions(-) create mode 100644 db/blip_rev_sender.go diff --git a/db/blip_handler.go b/db/blip_handler.go index f9cb63083a..aed8bc2c94 100644 --- a/db/blip_handler.go +++ b/db/blip_handler.go @@ -846,51 +846,54 @@ func (bh *blipHandler) handleProposeChanges(rq *blip.Message) error { // ////// DOCUMENTS: -func (bsc *BlipSyncContext) sendRevAsDelta(sender *blip.Sender, docID, revID string, deltaSrcRevID string, seq SequenceID, knownRevs map[string]bool, maxHistory int, handleChangesResponseCollection *DatabaseCollectionWithUser, collectionIdx *int) error { +// Returns false, nil if there is no error but the rev can't be sent as a delta. +func (bsc *BlipSyncContext) sendRevAsDelta(collection *DatabaseCollectionWithUser, r *revToSend, knownRevs map[string]bool, deltaSrcRevID string) (bool, error) { bsc.replicationStats.SendRevDeltaRequestedCount.Add(1) - revDelta, redactedRev, err := handleChangesResponseCollection.GetDelta(bsc.loggingCtx, docID, deltaSrcRevID, revID) + revDelta, redactedRev, err := collection.GetDelta(bsc.loggingCtx, r.docID, deltaSrcRevID, r.revID) if err == ErrForbidden { // nolint: gocritic // can't convert if/else if to switch since base.IsFleeceDeltaError is not switchable - return err + return false, err } else if base.IsFleeceDeltaError(err) { // Something went wrong in the diffing library. We want to know about this! - base.WarnfCtx(bsc.loggingCtx, "Falling back to full body replication. Error generating delta from %s to %s for key %s - err: %v", deltaSrcRevID, revID, base.UD(docID), err) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.WarnfCtx(bsc.loggingCtx, "Falling back to full body replication. Error generating delta from %s to %s for key %s - err: %v", deltaSrcRevID, r.revID, base.UD(r.docID), err) + return false, nil } else if err == base.ErrDeltaSourceIsTombstone { - base.TracefCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Delta source %s is tombstone. Unable to generate delta to %s for key %s", deltaSrcRevID, revID, base.UD(docID)) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.TracefCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Delta source %s is tombstone. Unable to generate delta to %s for key %s", deltaSrcRevID, r.revID, base.UD(r.docID)) + return false, nil } else if err != nil { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Couldn't get delta from %s to %s for key %s - err: %v", deltaSrcRevID, revID, base.UD(docID), err) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Couldn't get delta from %s to %s for key %s - err: %v", deltaSrcRevID, r.revID, base.UD(r.docID), err) + return false, nil } if redactedRev != nil { - history := toHistory(redactedRev.History, knownRevs, maxHistory) - properties := blipRevMessageProperties(history, redactedRev.Deleted, seq) - return bsc.sendRevisionWithProperties(sender, docID, revID, collectionIdx, redactedRev.BodyBytes, nil, properties, seq, nil) + history := toHistory(redactedRev.History, knownRevs, r.maxHistory) + properties := blipRevMessageProperties(history, redactedRev.Deleted, r.seq) + return true, bsc.sendRevisionWithProperties(r, redactedRev.BodyBytes, nil, properties, nil) } if revDelta == nil { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Couldn't get delta from %s to %s for key %s", deltaSrcRevID, revID, base.UD(docID)) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Couldn't get delta from %s to %s for key %s", deltaSrcRevID, r.revID, base.UD(r.docID)) + return false, nil } resendFullRevisionFunc := func() error { - base.InfofCtx(bsc.loggingCtx, base.KeySync, "Resending revision as full body. Peer couldn't process delta %s from %s to %s for key %s", base.UD(revDelta.DeltaBytes), deltaSrcRevID, revID, base.UD(docID)) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.InfofCtx(bsc.loggingCtx, base.KeySync, "Resending revision as full body. Peer couldn't process delta %s from %s to %s for key %s", base.UD(revDelta.DeltaBytes), deltaSrcRevID, r.revID, base.UD(r.docID)) + r.useDelta = false + bsc.revSender.addRevs([]*revToSend{r}) + return nil } - base.TracefCtx(bsc.loggingCtx, base.KeySync, "docID: %s - delta: %v", base.UD(docID), base.UD(string(revDelta.DeltaBytes))) - if err := bsc.sendDelta(sender, docID, collectionIdx, deltaSrcRevID, revDelta, seq, resendFullRevisionFunc); err != nil { - return err + base.TracefCtx(bsc.loggingCtx, base.KeySync, "r.docID: %s - delta: %v", base.UD(r.docID), base.UD(string(revDelta.DeltaBytes))) + if err := bsc.sendDelta(r, deltaSrcRevID, revDelta, resendFullRevisionFunc); err != nil { + return false, err } // We'll consider this one doc read for collection stats purposes, since GetDelta doesn't go through the normal getRev codepath. - handleChangesResponseCollection.collectionStats.NumDocReads.Add(1) - handleChangesResponseCollection.collectionStats.DocReadsBytes.Add(int64(len(revDelta.DeltaBytes))) + collection.collectionStats.NumDocReads.Add(1) + collection.collectionStats.DocReadsBytes.Add(int64(len(revDelta.DeltaBytes))) bsc.replicationStats.SendRevDeltaSentCount.Add(1) - return nil + return true, nil } func (bh *blipHandler) handleNoRev(rq *blip.Message) error { diff --git a/db/blip_rev_sender.go b/db/blip_rev_sender.go new file mode 100644 index 0000000000..3edb493087 --- /dev/null +++ b/db/blip_rev_sender.go @@ -0,0 +1,170 @@ +/* +Copyright 2023-Present Couchbase, Inc. + +Use of this software is governed by the Business Source License included in +the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that +file, in accordance with the Business Source License, use of this software will +be governed by the Apache License, Version 2.0, included in the file +licenses/APL2.txt. +*/ + +package db + +import ( + "sync" + "sync/atomic" + "time" + + "github.com/couchbase/go-blip" + "github.com/couchbase/sync_gateway/base" +) + +// A queue that takes `revToSend` structs that define a revision to send to the peer, +// and sends those to BlipSyncContext's `sendRevision` and `sendRevAsDelta` methods at a rate +// that ensures only a limited number of outgoing in-memory BLIP "rev" messages are present at once. +type blipRevSender struct { + bsc *BlipSyncContext // The main sync object [const] + maxActiveCount int // Max number of messages I can be sending at once [const] + maxActiveBytes int64 // Max total size of messages I'm sending [const] + mutex sync.Mutex // Synchronizes access to queue,activeCount + queue []*revToSend // Ordered queue of revisions to be sent [synced] + activeCount int // Number of revs being fetched, processed, sent [synced] + sendingCountA int64 // Number of BLIP messages being sent to the socket [atomic] + sendingBytesA int64 // Total size of BLIP messages I'm sending [atomic] +} + +// Captures the information about a "rev" message to send. Queued by blipRevSender. +type revToSend struct { + seq SequenceID // Sequence + docID string // Document ID to send + revID string // Revision ID to send + knownRevs []any // RevIDs the client already has + maxHistory int // Max length of rev history to send + useDelta bool // If true, send as delta if possible + collectionIdx *int // Identifies which collection + sender *blip.Sender // BLIP sender + timestamp time.Time // When the 'changes' response was received + messageLen int // Length of BLIP message; must be filled in when message sent +} + +// Creates a new blipRevSender. +// - `maxActiveCount` is the maximum number of revisions that can be actively processed: +// fetched from the database, converted to 'rev' messages, and being written to the socket. +// - `maxActiveBytes` is the (approximate) maximum total size in bytes of those messages, +// or 0 for no size limit. +func newBlipRevSender(blipSyncContext *BlipSyncContext, maxActiveCount int, maxActiveBytes int64) *blipRevSender { + return &blipRevSender{ + bsc: blipSyncContext, + maxActiveCount: maxActiveCount, + maxActiveBytes: maxActiveBytes, + } +} + +// Queues revisions to send. +func (s *blipRevSender) addRevs(revs []*revToSend) { + s.mutex.Lock() + defer s.mutex.Unlock() + + s.queue = append(s.queue, revs...) + s._sendMore() +} + +// To be called by the BlipSyncContext when it's finished writing a 'rev' message to the socket. +func (s *blipRevSender) completedRev(rev *revToSend) { + messageLen := rev.messageLen + s.bsc.threadPool.Go(func() { + s.mutex.Lock() + defer s.mutex.Unlock() + + s.activeCount-- + atomic.AddInt64(&s.sendingCountA, int64(-1)) + atomic.AddInt64(&s.sendingBytesA, int64(-messageLen)) + s._sendMore() + }) +} + +func (s *blipRevSender) _sendMore() { + // Mutex must be locked when calling this! + + // Get the current total size, and estimate the size of a message: + curSendingCount := atomic.LoadInt64(&s.sendingCountA) + estSendingBytes := atomic.LoadInt64(&s.sendingBytesA) + var estMessageSize int64 = 4096 + if curSendingCount > 0 { + estMessageSize = estSendingBytes / curSendingCount + } + + n := 0 + for s.activeCount < s.maxActiveCount && len(s.queue) > 0 { + if s.maxActiveBytes > 0 && estSendingBytes+estMessageSize > s.maxActiveBytes { + // Stop if the byte count is too high + break + } + // Send the next revision (asynchronously): + next := s.queue[0] + s.queue = s.queue[1:] + s.activeCount++ + s.bsc.threadPool.Go(func() { s._sendNow(next) }) + estSendingBytes += estMessageSize + n++ + } + // if len(s.queue) > 0 { + // base.WarnfCtx(s.bsc.loggingCtx, "_sendMore: stopping after %d, at %d bytes (est), %d messages ... avg msg size is %d", n, estSendingBytes, s.activeCount, estMessageSize) + // } +} + +func (s *blipRevSender) _sendNow(rev *revToSend) { + // Sends a 'rev' message, or if that fails, sends a 'norev'; then updates stats. + if err := s._trySendNow(rev); err != nil { + if base.IsDocNotFoundError(err) { + // If rev isn't available, send a 'norev'. This is important for client bookkeeping. + err = s.bsc.sendNoRev(rev, err) + } + if err != nil { + base.ErrorfCtx(s.bsc.loggingCtx, "Error sending 'rev' over BLIP: %s", err) + if cb := s.bsc.fatalErrorCallback; cb != nil { + cb(err) + } + } + } + + atomic.AddInt64(&s.sendingCountA, int64(1)) + atomic.AddInt64(&s.sendingBytesA, int64(rev.messageLen)) + + latency := time.Since(rev.timestamp).Nanoseconds() + s.bsc.replicationStats.HandleChangesSendRevCount.Add(1) + s.bsc.replicationStats.HandleChangesSendRevLatency.Add(latency) +} + +func (s *blipRevSender) _trySendNow(rev *revToSend) error { + // Sends a 'rev' message or returns an error. (Subroutine of _sendNow.) + + // Convert knownRevs to a set of strings: + knownRevs := make(map[string]bool, len(rev.knownRevs)) + var deltaSrcRevID *string + for _, knownRev := range rev.knownRevs { + if revID, ok := knownRev.(string); ok { + knownRevs[revID] = true + if deltaSrcRevID == nil { + // The first element of the knownRevs array is the one to use as deltaSrc + deltaSrcRevID = &revID + } + } else { + base.ErrorfCtx(s.bsc.loggingCtx, "Invalid knownRevs in response to 'changes' message") + } + } + rev.knownRevs = nil // (no longer needed) + + collection, err := s.bsc.copyDatabaseCollectionWithUser(rev.collectionIdx) + if err != nil { + return err + } + if rev.useDelta && deltaSrcRevID != nil { + sent, err := s.bsc.sendRevAsDelta(collection, rev, knownRevs, *deltaSrcRevID) + if sent || err != nil { + return err + } + // if rev can't be sent as a delta, send it as a full revision... + } + return s.bsc.sendRevision(collection, rev, knownRevs) +} diff --git a/db/blip_sync_context.go b/db/blip_sync_context.go index 8ca5f0a01d..da9be4c989 100644 --- a/db/blip_sync_context.go +++ b/db/blip_sync_context.go @@ -38,6 +38,12 @@ const ( // Above this amount, the BLIP engine stops reading from the WebSocket, applying back-pressure // to the client and keeping memory usage down. BlipMaxIncomingBytesBeingDispatched = 100000 // bytes + + // Max number of outgoing revisions in memory being sent + BlipMaxRevsSending = 50 + + // Max total size (bytes) of outgoing revisions in memory being sent + BlipMaxRevsLengthSending = 100 * 1000 ) var ErrClosedBLIPSender = errors.New("use of closed BLIP sender") @@ -59,6 +65,7 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con bsc.replicationStats = NewBlipSyncStats() } bsc.stats.lastReportTime.Store(time.Now().UnixMilli()) + bsc.revSender = newBlipRevSender(bsc, BlipMaxRevsSending, BlipMaxRevsLengthSending) if u := db.User(); u != nil { bsc.userName = u.Name() @@ -130,7 +137,7 @@ type BlipSyncContext struct { readOnly bool collections *blipCollections // all collections handled by blipSyncContext, implicit or via GetCollections - + revSender *blipRevSender // schedules sending 'rev' messages stats blipSyncStats // internal structure to store stats } @@ -290,7 +297,7 @@ func (bsc *BlipSyncContext) _copyContextDatabase() *Database { } // Handles the response to a pushed "changes" message, i.e. the list of revisions the client wants -func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response *blip.Message, changeArray [][]interface{}, requestSent time.Time, handleChangesResponseDbCollection *DatabaseCollectionWithUser, collectionIdx *int) error { +func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response *blip.Message, changeArray [][]interface{}, requestSent time.Time, collectionIdx *int) error { defer func() { if panicked := recover(); panicked != nil { bsc.replicationStats.NumHandlersPanicked.Add(1) @@ -334,13 +341,9 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response base.TracefCtx(bsc.loggingCtx, base.KeySync, "Client didn't specify 'deltas' property in 'changes' response. useDeltas: %v", bsc.useDeltas) } - // Maps docID --> a map containing true for revIDs known to the client - knownRevsByDoc := make(map[string]map[string]bool, len(answer)) - // `answer` is an array where each item is either an array of known rev IDs, or a non-array // placeholder (probably 0). The item numbers match those of changeArray. - var revSendTimeLatency int64 - var revSendCount int64 + revsToSend := make([]*revToSend, 0, len(answer)) sentSeqs := make([]SequenceID, 0) alreadyKnownSeqs := make([]SequenceID, 0) @@ -353,49 +356,23 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response seq := changeArray[i][0].(SequenceID) docID := changeArray[i][1].(string) revID := changeArray[i][2].(string) - if knownRevsArray, ok := knownRevsArrayInterface.([]interface{}); ok { - deltaSrcRevID := "" - knownRevs := knownRevsByDoc[docID] - if knownRevs == nil { - knownRevs = make(map[string]bool, len(knownRevsArray)) - knownRevsByDoc[docID] = knownRevs - } - - // The first element of the knownRevsArray returned from CBL is the parent revision to use as deltaSrc - if bsc.useDeltas && len(knownRevsArray) > 0 { - if revID, ok := knownRevsArray[0].(string); ok { - deltaSrcRevID = revID - } - } - - for _, rev := range knownRevsArray { - if revID, ok := rev.(string); ok { - knownRevs[revID] = true - } else { - base.ErrorfCtx(bsc.loggingCtx, "Invalid response to 'changes' message") - return nil - } - } - - var err error - if deltaSrcRevID != "" { - err = bsc.sendRevAsDelta(sender, docID, revID, deltaSrcRevID, seq, knownRevs, maxHistory, handleChangesResponseDbCollection, collectionIdx) - } else { - err = bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseDbCollection, collectionIdx) - } - if err != nil { - return err - } - - revSendTimeLatency += time.Since(changesResponseReceived).Nanoseconds() - revSendCount++ - + revsToSend = append(revsToSend, &revToSend{ + seq: seq, + docID: docID, + revID: revID, + knownRevs: knownRevsArray, + maxHistory: maxHistory, + useDelta: bsc.useDeltas, + collectionIdx: collectionIdx, + sender: sender, + timestamp: changesResponseReceived, + }) if collectionCtx.sgr2PushAddExpectedSeqsCallback != nil { sentSeqs = append(sentSeqs, seq) } } else { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s / %s (seq:%v)", base.UD(docID), revID, seq) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s / %s (rev.seq:%v)", base.UD(docID), revID, seq) if collectionCtx.sgr2PushAlreadyKnownSeqsCallback != nil { alreadyKnownSeqs = append(alreadyKnownSeqs, seq) } @@ -406,27 +383,29 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response collectionCtx.sgr2PushAlreadyKnownSeqsCallback(alreadyKnownSeqs...) } - if revSendCount > 0 { + if len(revsToSend) > 0 { + bsc.revSender.addRevs(revsToSend) if collectionCtx.sgr2PushAddExpectedSeqsCallback != nil { collectionCtx.sgr2PushAddExpectedSeqsCallback(sentSeqs...) } - - bsc.replicationStats.HandleChangesSendRevCount.Add(revSendCount) - bsc.replicationStats.HandleChangesSendRevLatency.Add(revSendTimeLatency) bsc.replicationStats.HandleChangesSendRevTime.Add(time.Since(changesResponseReceived).Nanoseconds()) } return nil } -// Pushes a revision body to the client -func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docID string, revID string, collectionIdx *int, - bodyBytes []byte, attMeta []AttachmentStorageMeta, properties blip.Properties, seq SequenceID, resendFullRevisionFunc func() error) error { +// Pushes a revision body to the client. Returns length of body in bytes. +func (bsc *BlipSyncContext) sendRevisionWithProperties(r *revToSend, + bodyBytes []byte, attMeta []AttachmentStorageMeta, properties blip.Properties, resendFullRevisionFunc func() error) error { + + docID := r.docID + revID := r.revID + seq := r.seq outrq := NewRevMessage() outrq.SetID(docID) outrq.SetRev(revID) - outrq.SetCollection(collectionIdx) + outrq.SetCollection(r.collectionIdx) if bsc.sendRevNoConflicts { outrq.SetNoConflicts(true) } @@ -435,7 +414,7 @@ func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docI outrq.SetProperties(properties) outrq.SetJSONBodyAsBytes(bodyBytes) - + r.messageLen = len(bodyBytes) // Update read stats if messageBody, err := outrq.Body(); err == nil { bsc.replicationStats.SendRevBytes.Add(int64(len(messageBody))) @@ -443,7 +422,7 @@ func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docI base.TracefCtx(bsc.loggingCtx, base.KeySync, "Sending revision %s/%s, body:%s, properties: %v, attDigests: %v", base.UD(docID), revID, base.UD(string(bodyBytes)), base.UD(properties), attMeta) - collectionCtx, err := bsc.collections.get(collectionIdx) + collectionCtx, err := bsc.collections.get(r.collectionIdx) if err != nil { return err } @@ -459,8 +438,10 @@ func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docI outrq.SetNoReply(true) } + outrq.OnSent(func() { bsc.revSender.completedRev(r) }) + // send the rev - if !bsc.sendBLIPMessage(sender, outrq.Message) { + if !bsc.sendBLIPMessage(r.sender, outrq.Message) { bsc.removeAllowedAttachments(docID, attMeta, activeSubprotocol) return ErrClosedBLIPSender } @@ -569,14 +550,14 @@ func (bsc *BlipSyncContext) setUseDeltas(clientCanUseDeltas bool) { } } -func (bsc *BlipSyncContext) sendDelta(sender *blip.Sender, docID string, collectionIdx *int, deltaSrcRevID string, revDelta *RevisionDelta, seq SequenceID, resendFullRevisionFunc func() error) error { +func (bsc *BlipSyncContext) sendDelta(r *revToSend, deltaSrcRevID string, revDelta *RevisionDelta, resendFullRevisionFunc func() error) error { - properties := blipRevMessageProperties(revDelta.RevisionHistory, revDelta.ToDeleted, seq) + properties := blipRevMessageProperties(revDelta.RevisionHistory, revDelta.ToDeleted, r.seq) properties[RevMessageDeltaSrc] = deltaSrcRevID - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending rev %q %s as delta. DeltaSrc:%s", base.UD(docID), revDelta.ToRevID, deltaSrcRevID) - return bsc.sendRevisionWithProperties(sender, docID, revDelta.ToRevID, collectionIdx, revDelta.DeltaBytes, revDelta.AttachmentStorageMeta, - properties, seq, resendFullRevisionFunc) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending rev %q %s as delta. DeltaSrc:%s", base.UD(r.docID), revDelta.ToRevID, deltaSrcRevID) + return bsc.sendRevisionWithProperties(r, revDelta.DeltaBytes, revDelta.AttachmentStorageMeta, + properties, resendFullRevisionFunc) } // sendBLIPMessage is a simple wrapper around all sent BLIP messages @@ -589,17 +570,17 @@ func (bsc *BlipSyncContext) sendBLIPMessage(sender *blip.Sender, msg *blip.Messa return ok } -func (bsc *BlipSyncContext) sendNoRev(sender *blip.Sender, docID, revID string, collectionIdx *int, seq SequenceID, err error) error { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending norev %q %s due to unavailable revision: %v", base.UD(docID), revID, err) +func (bsc *BlipSyncContext) sendNoRev(r *revToSend, err error) error { + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending norev %q %s due to unavailable revision: %v", base.UD(r.docID), r.revID, err) noRevRq := NewNoRevMessage() - noRevRq.SetId(docID) - noRevRq.SetRev(revID) - noRevRq.SetCollection(collectionIdx) + noRevRq.SetId(r.docID) + noRevRq.SetRev(r.revID) + noRevRq.SetCollection(r.collectionIdx) if bsc.blipContext.ActiveSubprotocol() == BlipCBMobileReplicationV2 && bsc.clientType == BLIPClientTypeSGR2 { - noRevRq.SetSeq(seq) + noRevRq.SetSeq(r.seq) } else { - noRevRq.SetSequence(seq) + noRevRq.SetSequence(r.seq) } status, reason := base.ErrorAsHTTPStatus(err) @@ -609,32 +590,34 @@ func (bsc *BlipSyncContext) sendNoRev(sender *blip.Sender, docID, revID string, noRevRq.SetReason(reason) noRevRq.SetNoReply(true) - if !bsc.sendBLIPMessage(sender, noRevRq.Message) { + noRevRq.OnSent(func() { bsc.revSender.completedRev(r) }) + if !bsc.sendBLIPMessage(r.sender, noRevRq.Message) { return ErrClosedBLIPSender } - collectionCtx, err := bsc.collections.get(collectionIdx) + collectionCtx, err := bsc.collections.get(r.collectionIdx) if err != nil { return err } if collectionCtx.sgr2PushProcessedSeqCallback != nil { - collectionCtx.sgr2PushProcessedSeqCallback(seq) + collectionCtx.sgr2PushProcessedSeqCallback(r.seq) } return nil } // Pushes a revision body to the client -func (bsc *BlipSyncContext) sendRevision(sender *blip.Sender, docID, revID string, seq SequenceID, knownRevs map[string]bool, maxHistory int, handleChangesResponseCollection *DatabaseCollectionWithUser, collectionIdx *int) error { - rev, err := handleChangesResponseCollection.GetRev(bsc.loggingCtx, docID, revID, true, nil) - if base.IsDocNotFoundError(err) { - return bsc.sendNoRev(sender, docID, revID, collectionIdx, seq, err) - } else if err != nil { - return fmt.Errorf("failed to GetRev for doc %s with rev %s: %w", base.UD(docID).Redact(), base.MD(revID).Redact(), err) +func (bsc *BlipSyncContext) sendRevision(collection *DatabaseCollectionWithUser, r *revToSend, knownRevs map[string]bool) error { + rev, err := collection.GetRev(bsc.loggingCtx, r.docID, r.revID, true, nil) + if err != nil { + if !base.IsDocNotFoundError(err) { + err = fmt.Errorf("failed to GetRev for doc %s with rev %s: %w", base.UD(r.docID).Redact(), base.MD(r.revID).Redact(), err) + } + return err } - base.TracefCtx(bsc.loggingCtx, base.KeySync, "sendRevision, rev attachments for %s/%s are %v", base.UD(docID), revID, base.UD(rev.Attachments)) + base.TracefCtx(bsc.loggingCtx, base.KeySync, "sendRevision, rev attachments for %s/%s are %v", base.UD(r.docID), r.revID, base.UD(rev.Attachments)) attachmentStorageMeta := ToAttachmentStorageMeta(rev.Attachments) var bodyBytes []byte if base.IsEnterpriseEdition() { @@ -651,7 +634,7 @@ func (bsc *BlipSyncContext) sendRevision(sender *blip.Sender, docID, revID strin } else { body, err := rev.Body() if err != nil { - return bsc.sendNoRev(sender, docID, revID, collectionIdx, seq, err) + return err } // Still need to stamp _attachments into BLIP messages @@ -662,16 +645,16 @@ func (bsc *BlipSyncContext) sendRevision(sender *blip.Sender, docID, revID strin bodyBytes, err = base.JSONMarshalCanonical(body) if err != nil { - return bsc.sendNoRev(sender, docID, revID, collectionIdx, seq, err) + return err } } - history := toHistory(rev.History, knownRevs, maxHistory) - properties := blipRevMessageProperties(history, rev.Deleted, seq) + history := toHistory(rev.History, knownRevs, r.maxHistory) + properties := blipRevMessageProperties(history, rev.Deleted, r.seq) if base.LogDebugEnabled(base.KeySync) { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending rev %q %s based on %d known, digests: %v", base.UD(docID), revID, len(knownRevs), digests(attachmentStorageMeta)) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending rev %q %s based on %d known, digests: %v", base.UD(r.docID), r.revID, len(r.knownRevs), digests(attachmentStorageMeta)) } - return bsc.sendRevisionWithProperties(sender, docID, revID, collectionIdx, bodyBytes, attachmentStorageMeta, properties, seq, nil) + return bsc.sendRevisionWithProperties(r, bodyBytes, attachmentStorageMeta, properties, nil) } // digests returns a slice of digest extracted from the given attachment meta. From 2d5b448aa49918a67132a0c1614639563cae1719 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Wed, 10 May 2023 12:19:57 -0700 Subject: [PATCH 31/42] Fixed tests that copy blip.Message structs The struct has a Mutex in it now so it shouldn't be copied. --- rest/blip_api_crud_test.go | 4 ++-- rest/blip_client_test.go | 8 +++----- rest/revocation_test.go | 12 ++++++------ rest/utilities_testing.go | 4 ++-- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/rest/blip_api_crud_test.go b/rest/blip_api_crud_test.go index 0b5133c7d6..2fc8d2ba60 100644 --- a/rest/blip_api_crud_test.go +++ b/rest/blip_api_crud_test.go @@ -2033,7 +2033,7 @@ func TestRemovedMessageWithAlternateAccess(t *testing.T) { messages := btc.pullReplication.GetMessages() var highestMsgSeq uint32 - var highestSeqMsg blip.Message + var highestSeqMsg *blip.Message // Grab most recent changes message for _, message := range messages { messageBody, err := message.Body() @@ -2135,7 +2135,7 @@ func TestRemovedMessageWithAlternateAccessAndChannelFilteredReplication(t *testi messages := btc.pullReplication.GetMessages() var highestMsgSeq uint32 - var highestSeqMsg blip.Message + var highestSeqMsg *blip.Message // Grab most recent changes message for _, message := range messages { messageBody, err := message.Body() diff --git a/rest/blip_client_test.go b/rest/blip_client_test.go index 0303995486..448a188994 100644 --- a/rest/blip_client_test.go +++ b/rest/blip_client_test.go @@ -1053,15 +1053,13 @@ func (btr *BlipTesterReplicator) GetMessage(serialNumber blip.MessageNumber) (ms } // GetMessages returns a copy of all messages stored in the Client keyed by serial number -func (btr *BlipTesterReplicator) GetMessages() map[blip.MessageNumber]blip.Message { +func (btr *BlipTesterReplicator) GetMessages() map[blip.MessageNumber]*blip.Message { btr.messagesLock.RLock() defer btr.messagesLock.RUnlock() - messages := make(map[blip.MessageNumber]blip.Message, len(btr.messages)) + messages := make(map[blip.MessageNumber]*blip.Message, len(btr.messages)) for k, v := range btr.messages { - // Read the body before copying, since it might be read asynchronously - _, _ = v.Body() - messages[k] = *v + messages[k] = v.Clone() } return messages diff --git a/rest/revocation_test.go b/rest/revocation_test.go index bf0e50e2b5..32ce7959b3 100644 --- a/rest/revocation_test.go +++ b/rest/revocation_test.go @@ -159,22 +159,22 @@ func InitScenario(t *testing.T, rtConfig *RestTesterConfig) (ChannelRevocationTe defaultSyncFn := ` function (doc, oldDoc){ - if (doc._id === 'userRoles'){ + if (doc._id === 'userRoles'){ for (var key in doc.roles){ role(key, doc.roles[key]); } } - if (doc._id === 'roleChannels'){ + if (doc._id === 'roleChannels'){ for (var key in doc.channels){ access(key, doc.channels[key]); } } - if (doc._id === 'userChannels'){ + if (doc._id === 'userChannels'){ for (var key in doc.channels){ access(key, doc.channels[key]); } } - if (doc._id.indexOf("doc") >= 0){ + if (doc._id.indexOf("doc") >= 0){ channel(doc.channels); } }` @@ -1442,7 +1442,7 @@ func TestRevocationWithUserXattrs(t *testing.T) { access(key, meta.xattrs.channelInfo.userChannels[key]); } } - if (doc._id.indexOf("doc") >= 0){ + if (doc._id.indexOf("doc") >= 0){ channel(doc.channels); } }`, @@ -2328,7 +2328,7 @@ func TestRevocationNoRev(t *testing.T) { messages := btc.pullReplication.GetMessages() var highestMsgSeq uint32 - var highestSeqMsg blip.Message + var highestSeqMsg *blip.Message // Grab most recent changes message for _, message := range messages { messageBody, err := message.Body() diff --git a/rest/utilities_testing.go b/rest/utilities_testing.go index d8a586c881..e743b7934e 100644 --- a/rest/utilities_testing.go +++ b/rest/utilities_testing.go @@ -1254,7 +1254,7 @@ type BlipTester struct { } // Close the bliptester -func (bt BlipTester) Close() { +func (bt *BlipTester) Close() { bt.sender.Close() if !bt.avoidRestTesterClose { bt.restTester.Close() @@ -1262,7 +1262,7 @@ func (bt BlipTester) Close() { } // Returns database context for blipTester (assumes underlying rest tester is based on a single db - returns first it finds) -func (bt BlipTester) DatabaseContext() *db.DatabaseContext { +func (bt *BlipTester) DatabaseContext() *db.DatabaseContext { dbs := bt.restTester.ServerContext().AllDatabases() for _, database := range dbs { return database From c471697712b527815b7ca8ed58904d5903014086 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Tue, 30 May 2023 18:57:23 -0400 Subject: [PATCH 32/42] CBG-2977 allow DELETE on a broken DB config (#6260) --- base/leaky_bucket.go | 18 ++++ base/util_testing.go | 16 +++ db/database.go | 26 ++--- db/database_test.go | 10 +- rest/api.go | 2 +- rest/config.go | 8 +- rest/handler.go | 74 +++++++++----- rest/rest_tester_cluster_test.go | 4 +- rest/routing.go | 4 +- rest/server_context.go | 95 +++++++++++------ rest/server_context_test.go | 33 ++++-- rest/serverless_test.go | 2 +- rest/upgradetest/remove_collection_test.go | 112 +++++++++++++++++++++ rest/utilities_testing.go | 20 ++-- 14 files changed, 323 insertions(+), 101 deletions(-) create mode 100644 rest/upgradetest/remove_collection_test.go diff --git a/base/leaky_bucket.go b/base/leaky_bucket.go index 0fc2c0aec9..cf9e257e86 100644 --- a/base/leaky_bucket.go +++ b/base/leaky_bucket.go @@ -11,6 +11,7 @@ package base import ( "expvar" + "fmt" "math" "time" @@ -27,6 +28,7 @@ type LeakyBucket struct { } var _ sgbucket.BucketStore = &LeakyBucket{} +var _ sgbucket.DynamicDataStoreBucket = &LeakyBucket{} func NewLeakyBucket(bucket Bucket, config LeakyBucketConfig) *LeakyBucket { return &LeakyBucket{ @@ -94,6 +96,22 @@ func (b *LeakyBucket) GetUnderlyingBucket() Bucket { return b.bucket } +func (b *LeakyBucket) CreateDataStore(name sgbucket.DataStoreName) error { + dynamicDataStore, ok := b.GetUnderlyingBucket().(sgbucket.DynamicDataStoreBucket) + if !ok { + return fmt.Errorf("Bucket %T doesn't support dynamic collection creation", b.GetUnderlyingBucket()) + } + return dynamicDataStore.CreateDataStore(name) +} + +func (b *LeakyBucket) DropDataStore(name sgbucket.DataStoreName) error { + dynamicDataStore, ok := b.GetUnderlyingBucket().(sgbucket.DynamicDataStoreBucket) + if !ok { + return fmt.Errorf("Bucket %T doesn't support dynamic collection creation", b.GetUnderlyingBucket()) + } + return dynamicDataStore.DropDataStore(name) +} + // The config object that controls the LeakyBucket behavior type LeakyBucketConfig struct { // Incr() fails N times before finally succeeding diff --git a/base/util_testing.go b/base/util_testing.go index 806da93c07..44501772d7 100644 --- a/base/util_testing.go +++ b/base/util_testing.go @@ -174,6 +174,22 @@ func (b *TestBucket) GetMetadataStore() sgbucket.DataStore { return b.Bucket.DefaultDataStore() } +func (b *TestBucket) CreateDataStore(name sgbucket.DataStoreName) error { + dynamicDataStore, ok := b.Bucket.(sgbucket.DynamicDataStoreBucket) + if !ok { + return fmt.Errorf("Bucket %T doesn't support dynamic collection creation", b.Bucket) + } + return dynamicDataStore.CreateDataStore(name) +} + +func (b *TestBucket) DropDataStore(name sgbucket.DataStoreName) error { + dynamicDataStore, ok := b.GetUnderlyingBucket().(sgbucket.DynamicDataStoreBucket) + if !ok { + return fmt.Errorf("Bucket %T doesn't support dynamic collection creation", b.GetUnderlyingBucket()) + } + return dynamicDataStore.DropDataStore(name) +} + // GetDefaultDataStore returns the default DataStore. This is likely never actually wanted over GetSingleDataStore, so is left commented until absolutely required. // func (b *TestBucket) GetDefaultDataStore() sgbucket.DataStore { // b.t.Logf("Using default collection - Are you sure you want this instead of GetSingleDataStore() ?") diff --git a/db/database.go b/db/database.go index fcd4f21908..bf8495de39 100644 --- a/db/database.go +++ b/db/database.go @@ -310,17 +310,15 @@ func connectToBucketErrorHandling(ctx context.Context, spec base.BucketSpec, got return false, nil } -type OpenBucketFn func(ctx context.Context, spec base.BucketSpec) (base.Bucket, error) +type OpenBucketFn func(context.Context, base.BucketSpec, bool) (base.Bucket, error) -// connectToBucketFailFast opens a Couchbase connect and return a specific bucket without retrying on failure. -func connectToBucketFailFast(ctx context.Context, spec base.BucketSpec) (bucket base.Bucket, err error) { - bucket, err = base.GetBucket(spec) - _, err = connectToBucketErrorHandling(ctx, spec, err) - return bucket, err -} - -// connectToBucket opens a Couchbase connection and return a specific bucket. -func connectToBucket(ctx context.Context, spec base.BucketSpec) (base.Bucket, error) { +// ConnectToBucket opens a Couchbase connection and return a specific bucket. If failFast is set, fail immediately if the bucket doesn't exist, otherwise retry waiting for bucket to exist. +func ConnectToBucket(ctx context.Context, spec base.BucketSpec, failFast bool) (base.Bucket, error) { + if failFast { + bucket, err := base.GetBucket(spec) + _, err = connectToBucketErrorHandling(ctx, spec, err) + return bucket, err + } // start a retry loop to connect to the bucket backing off double the delay each time worker := func() (bool, error, interface{}) { @@ -342,14 +340,6 @@ func connectToBucket(ctx context.Context, spec base.BucketSpec) (base.Bucket, er return ibucket.(base.Bucket), nil } -// GetConnectToBucketFn returns a different OpenBucketFn to connect to the bucket depending on the value of failFast -func GetConnectToBucketFn(failFast bool) OpenBucketFn { - if failFast { - return connectToBucketFailFast - } - return connectToBucket -} - // Returns Couchbase Server Cluster UUID on a timeout. If running against walrus, do return an empty string. func getServerUUID(ctx context.Context, bucket base.Bucket) (string, error) { gocbV2Bucket, err := base.AsGocbV2Bucket(bucket) diff --git a/db/database_test.go b/db/database_test.go index 983b412466..51600e642a 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -1851,9 +1851,10 @@ func BenchmarkDatabase(b *testing.B) { for i := 0; i < b.N; i++ { ctx := base.TestCtx(b) - bucket, _ := connectToBucket(ctx, base.BucketSpec{ + bucket, _ := ConnectToBucket(ctx, base.BucketSpec{ Server: base.UnitTestUrl(), - BucketName: fmt.Sprintf("b-%d", i)}) + BucketName: fmt.Sprintf("b-%d", i)}, + true) dbCtx, _ := NewDatabaseContext(ctx, "db", bucket, false, DatabaseContextOptions{}) db, _ := CreateDatabase(dbCtx) collection := GetSingleDatabaseCollectionWithUser(b, db) @@ -1869,9 +1870,10 @@ func BenchmarkPut(b *testing.B) { base.DisableTestLogging(b) ctx := base.TestCtx(b) - bucket, _ := connectToBucket(ctx, base.BucketSpec{ + bucket, _ := ConnectToBucket(ctx, base.BucketSpec{ Server: base.UnitTestUrl(), - BucketName: "Bucket"}) + BucketName: "Bucket"}, + true) context, _ := NewDatabaseContext(ctx, "db", bucket, false, DatabaseContextOptions{}) db, _ := CreateDatabase(context) collection := GetSingleDatabaseCollectionWithUser(b, db) diff --git a/rest/api.go b/rest/api.go index 21884eab03..a340946003 100644 --- a/rest/api.go +++ b/rest/api.go @@ -218,7 +218,7 @@ func (h *handler) handleFlush() error { } // Manually re-open a temporary bucket connection just for flushing purposes - tempBucketForFlush, err := db.GetConnectToBucketFn(false)(h.ctx(), spec) + tempBucketForFlush, err := db.ConnectToBucket(h.ctx(), spec, false) if err != nil { return err } diff --git a/rest/config.go b/rest/config.go index 848bee10aa..2c06550b5d 100644 --- a/rest/config.go +++ b/rest/config.go @@ -1267,7 +1267,8 @@ func SetupServerContext(ctx context.Context, config *StartupConfig, persistentCo sc := NewServerContext(ctx, config, persistentConfig) if !base.ServerIsWalrus(config.Bootstrap.Server) { - if err := sc.initializeCouchbaseServerConnections(ctx); err != nil { + failFast := false + if err := sc.initializeCouchbaseServerConnections(ctx, failFast); err != nil { return nil, err } } @@ -1481,6 +1482,9 @@ func (sc *ServerContext) bucketNameFromDbName(dbName string) (bucketName string, return dbc.Bucket.GetName(), true } + if sc.BootstrapContext.Connection == nil { + return "", false + } // To search for database with the specified name, need to iterate over all buckets: // - look for dbName-scoped config file // - fetch default config file (backward compatibility, check internal DB name) @@ -1611,7 +1615,7 @@ func (sc *ServerContext) FetchConfigs(ctx context.Context, isInitialStartup bool // _applyConfigs takes a map of dbName->DatabaseConfig and loads them into the ServerContext where necessary. func (sc *ServerContext) _applyConfigs(ctx context.Context, dbNameConfigs map[string]DatabaseConfig, isInitialStartup bool) (count int) { for dbName, cnf := range dbNameConfigs { - applied, err := sc._applyConfig(base.NewNonCancelCtx(), cnf, false, isInitialStartup) + applied, err := sc._applyConfig(base.NewNonCancelCtx(), cnf, true, isInitialStartup) if err != nil { base.ErrorfCtx(ctx, "Couldn't apply config for database %q: %v", base.MD(dbName), err) continue diff --git a/rest/handler.go b/rest/handler.go index 3f30b4bd85..7e8edc0017 100644 --- a/rest/handler.go +++ b/rest/handler.go @@ -1,4 +1,4 @@ -// Copyright 2012-Present Couchbase, Inc. +/// Copyright 2012-Present Couchbase, Inc. // // Use of this software is governed by the Business Source License included // in the file licenses/BSL-Couchbase.txt. As of the Change Date specified @@ -100,7 +100,8 @@ type handler struct { serialNumber uint64 formattedSerialNumber string loggedDuration bool - runOffline bool + runOffline bool // allows running on an offline database + allowNilDBContext bool // allow acceess to a database based only on name, looking up in metadata registry queryValues url.Values // Copy of results of rq.URL.Query() permissionsResults map[string]bool authScopeFunc authScopeFunc @@ -123,8 +124,7 @@ type handlerMethod func(*handler) error // Creates an http.Handler that will run a handler with the given method func makeHandler(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod) http.Handler { return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) { - runOffline := false - h := newHandler(server, privs, r, rq, runOffline) + h := newHandler(server, privs, r, rq, handlerOptions{}) err := h.invoke(method, accessPermissions, responsePermissions) h.writeError(err) h.logDuration(true) @@ -134,8 +134,24 @@ func makeHandler(server *ServerContext, privs handlerPrivs, accessPermissions [] // Creates an http.Handler that will run a handler with the given method even if the target DB is offline func makeOfflineHandler(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod) http.Handler { return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) { - runOffline := true - h := newHandler(server, privs, r, rq, runOffline) + options := handlerOptions{ + runOffline: true, + } + h := newHandler(server, privs, r, rq, options) + err := h.invoke(method, accessPermissions, responsePermissions) + h.writeError(err) + h.logDuration(true) + }) +} + +// makeMetadataDBOfflineHandler creates an http.Handler that will run a handler with the given method even if the target DB is not able to be instantiated +func makeMetadataDBOfflineHandler(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod) http.Handler { + return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) { + options := handlerOptions{ + runOffline: true, + allowNilDBContext: true, + } + h := newHandler(server, privs, r, rq, options) err := h.invoke(method, accessPermissions, responsePermissions) h.writeError(err) h.logDuration(true) @@ -146,8 +162,7 @@ func makeOfflineHandler(server *ServerContext, privs handlerPrivs, accessPermiss // given the endpoint payload returns an auth scope. func makeHandlerSpecificAuthScope(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod, dbAuthStringFunc func([]byte) (string, error)) http.Handler { return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) { - runOffline := false - h := newHandler(server, privs, r, rq, runOffline) + h := newHandler(server, privs, r, rq, handlerOptions{}) h.authScopeFunc = dbAuthStringFunc err := h.invoke(method, accessPermissions, responsePermissions) h.writeError(err) @@ -155,16 +170,22 @@ func makeHandlerSpecificAuthScope(server *ServerContext, privs handlerPrivs, acc }) } -func newHandler(server *ServerContext, privs handlerPrivs, r http.ResponseWriter, rq *http.Request, runOffline bool) *handler { +type handlerOptions struct { + runOffline bool // if true, allow handler to run when a database is offline + allowNilDBContext bool // if true, allow a db-scoped handler to be invoked with a nil dbContext in cases where the database config exists but has an error preventing dbContext initialization" +} + +func newHandler(server *ServerContext, privs handlerPrivs, r http.ResponseWriter, rq *http.Request, options handlerOptions) *handler { h := &handler{ - server: server, - privs: privs, - rq: rq, - response: r, - status: http.StatusOK, - serialNumber: atomic.AddUint64(&lastSerialNum, 1), - startTime: time.Now(), - runOffline: runOffline, + server: server, + privs: privs, + rq: rq, + response: r, + status: http.StatusOK, + serialNumber: atomic.AddUint64(&lastSerialNum, 1), + startTime: time.Now(), + runOffline: options.runOffline, + allowNilDBContext: options.allowNilDBContext, } // initialize h.rqCtx @@ -300,13 +321,14 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission var dbContext *db.DatabaseContext + var bucketName string + // look up the database context: if keyspaceDb != "" { h.addDatabaseLogContext(keyspaceDb) var err error if dbContext, err = h.server.GetActiveDatabase(keyspaceDb); err != nil { if err == base.ErrNotFound { - if shouldCheckAdminAuth { // Check if authenticated before attempting to get inactive database authorized, err := h.checkAdminAuthenticationOnly() @@ -317,10 +339,11 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission return ErrInvalidLogin } } - dbContext, err = h.server.GetInactiveDatabase(h.ctx(), keyspaceDb) + var dbConfigFound bool + dbContext, dbConfigFound, err = h.server.GetInactiveDatabase(h.ctx(), keyspaceDb) if err != nil { if httpError, ok := err.(*base.HTTPError); ok && httpError.Status == http.StatusNotFound { - if shouldCheckAdminAuth { + if shouldCheckAdminAuth && (!h.allowNilDBContext || !dbConfigFound) { return base.HTTPErrorf(http.StatusForbidden, "") } else if h.privs == regularPrivs || h.privs == publicPrivs { if !h.providedAuthCredentials() { @@ -330,8 +353,11 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission return ErrInvalidLogin } } - base.InfofCtx(h.ctx(), base.KeyHTTP, "Error trying to get db %s: %v", base.MD(keyspaceDb), err) - return err + if !h.allowNilDBContext || !dbConfigFound { + base.InfofCtx(h.ctx(), base.KeyHTTP, "Error trying to get db %s: %v", base.MD(keyspaceDb), err) + return err + } + bucketName, _ = h.server.bucketNameFromDbName(keyspaceDb) } } else { return err @@ -398,7 +424,6 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission } } } - if shouldCheckAdminAuth { // If server is walrus but auth is enabled we should just kick the user out as invalid as we have nothing to // validate credentials against @@ -425,13 +450,12 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission authScope = dbContext.Bucket.GetName() } else { managementEndpoints, httpClient, err = h.server.ObtainManagementEndpointsAndHTTPClient() - authScope = "" + authScope = bucketName } if err != nil { base.WarnfCtx(h.ctx(), "An error occurred whilst obtaining management endpoints: %v", err) return base.HTTPErrorf(http.StatusInternalServerError, "") } - if h.authScopeFunc != nil { body, err := h.readBody() if err != nil { diff --git a/rest/rest_tester_cluster_test.go b/rest/rest_tester_cluster_test.go index fa57e00e57..5addaf4991 100644 --- a/rest/rest_tester_cluster_test.go +++ b/rest/rest_tester_cluster_test.go @@ -100,9 +100,9 @@ func NewRestTesterCluster(t *testing.T, config *RestTesterClusterConfig) *RestTe // Set group ID for each RestTester from cluster if config.rtConfig == nil { - config.rtConfig = &RestTesterConfig{groupID: config.groupID} + config.rtConfig = &RestTesterConfig{GroupID: config.groupID} } else { - config.rtConfig.groupID = config.groupID + config.rtConfig.GroupID = config.groupID } // only persistent mode is supported for a RestTesterCluster config.rtConfig.PersistentConfig = true diff --git a/rest/routing.go b/rest/routing.go index 79a511b219..895c534643 100644 --- a/rest/routing.go +++ b/rest/routing.go @@ -327,7 +327,7 @@ func CreateAdminRouter(sc *ServerContext) *mux.Router { r.Handle("/{newdb:"+dbRegex+"}/", makeHandlerSpecificAuthScope(sc, adminPrivs, []Permission{PermCreateDb}, nil, (*handler).handleCreateDB, getAuthScopeHandleCreateDB)).Methods("PUT") r.Handle("/{db:"+dbRegex+"}/", - makeOfflineHandler(sc, adminPrivs, []Permission{PermDeleteDb}, nil, (*handler).handleDeleteDB)).Methods("DELETE") + makeMetadataDBOfflineHandler(sc, adminPrivs, []Permission{PermDeleteDb}, nil, (*handler).handleDeleteDB)).Methods("DELETE") r.Handle("/_all_dbs", makeHandler(sc, adminPrivs, []Permission{PermDevOps}, nil, (*handler).handleAllDbs)).Methods("GET", "HEAD") @@ -364,7 +364,7 @@ func wrapRouter(sc *ServerContext, privs handlerPrivs, router *mux.Router) http. router.ServeHTTP(response, rq) } else { // Log the request - h := newHandler(sc, privs, response, rq, false) + h := newHandler(sc, privs, response, rq, handlerOptions{}) h.logRequestLine() // Inject CORS if enabled and requested and not admin port diff --git a/rest/server_context.go b/rest/server_context.go index d02d00ef32..3a88cc5f47 100644 --- a/rest/server_context.go +++ b/rest/server_context.go @@ -93,10 +93,19 @@ type bootstrapContext struct { doneChan chan struct{} // doneChan is closed when the bootstrap polling goroutine finishes. } +type getOrAddDatabaseConfigOptions struct { + failFast bool // if set, a failure to connect to a bucket of collection will immediately fail + useExisting bool // if true, return an existing DatabaseContext vs return an error + connectToBucketFn db.OpenBucketFn // supply a custom function for buckets, used for testing only +} + func (sc *ServerContext) CreateLocalDatabase(ctx context.Context, dbs DbConfigMap) error { for _, dbConfig := range dbs { dbc := dbConfig.ToDatabaseConfig() - _, err := sc._getOrAddDatabaseFromConfig(ctx, *dbc, false, db.GetConnectToBucketFn(false)) + _, err := sc._getOrAddDatabaseFromConfig(ctx, *dbc, getOrAddDatabaseConfigOptions{ + useExisting: false, + failFast: false, + }) if err != nil { return err } @@ -227,7 +236,8 @@ func (sc *ServerContext) Close(ctx context.Context) { func (sc *ServerContext) GetDatabase(ctx context.Context, name string) (*db.DatabaseContext, error) { dbc, err := sc.GetActiveDatabase(name) if err == base.ErrNotFound { - return sc.GetInactiveDatabase(ctx, name) + dbc, _, err := sc.GetInactiveDatabase(ctx, name) + return dbc, err } return dbc, err } @@ -248,35 +258,35 @@ func (sc *ServerContext) GetActiveDatabase(name string) (*db.DatabaseContext, er // GetInactiveDatabase attempts to load the database and return it's DatabaseContext. It will first attempt to unsuspend the // database, and if that fails, try to load the database from the buckets. -// This should be used if GetActiveDatabase fails. -func (sc *ServerContext) GetInactiveDatabase(ctx context.Context, name string) (*db.DatabaseContext, error) { +// This should be used if GetActiveDatabase fails. Turns the database context, a variable to say if the config exists, and an error. +func (sc *ServerContext) GetInactiveDatabase(ctx context.Context, name string) (*db.DatabaseContext, bool, error) { dbc, err := sc.unsuspendDatabase(ctx, name) if err != nil && err != base.ErrNotFound && err != ErrSuspendingDisallowed { - return nil, err + return nil, false, err } else if err == nil { - return dbc, nil + return dbc, true, nil } + var dbConfigFound bool // database not loaded, fallback to fetching it from cluster if sc.BootstrapContext.Connection != nil { - var found bool if sc.Config.IsServerless() { - found, _ = sc.fetchAndLoadDatabaseSince(ctx, name, sc.Config.Unsupported.Serverless.MinConfigFetchInterval) + dbConfigFound, _ = sc.fetchAndLoadDatabaseSince(ctx, name, sc.Config.Unsupported.Serverless.MinConfigFetchInterval) } else { - found, _ = sc.fetchAndLoadDatabase(base.NewNonCancelCtx(), name) + dbConfigFound, _ = sc.fetchAndLoadDatabase(base.NewNonCancelCtx(), name) } - if found { + if dbConfigFound { sc.lock.RLock() defer sc.lock.RUnlock() dbc := sc.databases_[name] if dbc != nil { - return dbc, nil + return dbc, dbConfigFound, nil } } } - return nil, base.HTTPErrorf(http.StatusNotFound, "no such database %q", name) + return nil, dbConfigFound, base.HTTPErrorf(http.StatusNotFound, "no such database %q", name) } func (sc *ServerContext) GetDbConfig(name string) *DbConfig { @@ -356,7 +366,9 @@ func (sc *ServerContext) PostUpgrade(ctx context.Context, preview bool) (postUpg func (sc *ServerContext) _reloadDatabase(ctx context.Context, reloadDbName string, failFast bool) (*db.DatabaseContext, error) { sc._unloadDatabase(ctx, reloadDbName) config := sc.dbConfigs[reloadDbName] - return sc._getOrAddDatabaseFromConfig(ctx, config.DatabaseConfig, true, db.GetConnectToBucketFn(failFast)) + return sc._getOrAddDatabaseFromConfig(ctx, config.DatabaseConfig, getOrAddDatabaseConfigOptions{ + useExisting: true, + failFast: failFast}) } // Removes and re-adds a database to the ServerContext. @@ -377,18 +389,21 @@ func (sc *ServerContext) ReloadDatabaseWithConfig(nonContextStruct base.NonCance func (sc *ServerContext) _reloadDatabaseWithConfig(ctx context.Context, config DatabaseConfig, failFast bool) error { sc._removeDatabase(ctx, config.Name) - _, err := sc._getOrAddDatabaseFromConfig(ctx, config, false, db.GetConnectToBucketFn(failFast)) + _, err := sc._getOrAddDatabaseFromConfig(ctx, config, getOrAddDatabaseConfigOptions{ + useExisting: false, + failFast: failFast, + }) return err } // Adds a database to the ServerContext. Attempts a read after it gets the write // lock to see if it's already been added by another process. If so, returns either the // existing DatabaseContext or an error based on the useExisting flag. -func (sc *ServerContext) getOrAddDatabaseFromConfig(ctx context.Context, config DatabaseConfig, useExisting bool, openBucketFn db.OpenBucketFn) (*db.DatabaseContext, error) { +func (sc *ServerContext) getOrAddDatabaseFromConfig(ctx context.Context, config DatabaseConfig, options getOrAddDatabaseConfigOptions) (*db.DatabaseContext, error) { // Obtain write lock during add database, to avoid race condition when creating based on ConfigServer sc.lock.Lock() defer sc.lock.Unlock() - return sc._getOrAddDatabaseFromConfig(ctx, config, useExisting, openBucketFn) + return sc._getOrAddDatabaseFromConfig(ctx, config, options) } func GetBucketSpec(ctx context.Context, config *DatabaseConfig, serverConfig *StartupConfig) (spec base.BucketSpec, err error) { @@ -432,8 +447,7 @@ func GetBucketSpec(ctx context.Context, config *DatabaseConfig, serverConfig *St // lock to see if it's already been added by another process. If so, returns either the // existing DatabaseContext or an error based on the useExisting flag. // Pass in a bucketFromBucketSpecFn to replace the default ConnectToBucket function. This will cause the failFast argument to be ignored -func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config DatabaseConfig, useExisting bool, openBucketFn db.OpenBucketFn) (*db.DatabaseContext, error) { - +func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config DatabaseConfig, options getOrAddDatabaseConfigOptions) (*db.DatabaseContext, error) { // Generate bucket spec and validate whether db already exists spec, err := GetBucketSpec(ctx, &config, sc.Config) if err != nil { @@ -467,7 +481,7 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config } if sc.databases_[dbName] != nil { - if useExisting { + if options.useExisting { return sc.databases_[dbName], nil } else { return nil, base.HTTPErrorf(http.StatusPreconditionFailed, // what CouchDB returns @@ -482,11 +496,18 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config // Connect to bucket base.InfofCtx(ctx, base.KeyAll, "Opening db /%s as bucket %q, pool %q, server <%s>", base.MD(dbName), base.MD(spec.BucketName), base.SD(base.DefaultPool), base.SD(spec.Server)) - bucket, err := openBucketFn(ctx, spec) + + // the connectToBucketFn is used for testing seam + var bucket base.Bucket + if options.connectToBucketFn != nil { + // the connectToBucketFn is used for testing seam + bucket, err = options.connectToBucketFn(ctx, spec, options.failFast) + } else { + bucket, err = db.ConnectToBucket(ctx, spec, options.failFast) + } if err != nil { return nil, err } - // If using a walrus bucket, force use of views useViews := base.BoolDefault(config.UseViews, false) if !useViews && spec.IsWalrusBucket() { @@ -558,15 +579,20 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config for collectionName, _ := range scopeConfig.Collections { var dataStore sgbucket.DataStore - waitForCollection := func() (bool, error, interface{}) { + var err error + if options.failFast { dataStore, err = bucket.NamedDataStore(base.ScopeAndCollectionName{Scope: scopeName, Collection: collectionName}) - return err != nil, err, nil - } + } else { + waitForCollection := func() (bool, error, interface{}) { + dataStore, err = bucket.NamedDataStore(base.ScopeAndCollectionName{Scope: scopeName, Collection: collectionName}) + return err != nil, err, nil + } - err, _ := base.RetryLoop( - fmt.Sprintf("waiting for %s.%s.%s to exist", base.MD(bucket.GetName()), base.MD(scopeName), base.MD(collectionName)), - waitForCollection, - base.CreateMaxDoublingSleeperFunc(30, 10, 1000)) + err, _ = base.RetryLoop( + fmt.Sprintf("waiting for %s.%s.%s to exist", base.MD(bucket.GetName()), base.MD(scopeName), base.MD(collectionName)), + waitForCollection, + base.CreateMaxDoublingSleeperFunc(30, 10, 1000)) + } if err != nil { return nil, fmt.Errorf("error attempting to create/update database: %w", err) } @@ -1224,13 +1250,15 @@ func (sc *ServerContext) initEventHandlers(ctx context.Context, dbcontext *db.Da // Adds a database to the ServerContext given its configuration. If an existing config is found // for the name, returns an error. func (sc *ServerContext) AddDatabaseFromConfig(ctx context.Context, config DatabaseConfig) (*db.DatabaseContext, error) { - return sc.getOrAddDatabaseFromConfig(ctx, config, false, db.GetConnectToBucketFn(false)) + failFast := false + return sc.getOrAddDatabaseFromConfig(ctx, config, getOrAddDatabaseConfigOptions{useExisting: false, failFast: failFast}) } // AddDatabaseFromConfigFailFast adds a database to the ServerContext given its configuration and fails fast. // If an existing config is found for the name, returns an error. func (sc *ServerContext) AddDatabaseFromConfigFailFast(nonContextStruct base.NonCancellableContext, config DatabaseConfig) (*db.DatabaseContext, error) { - return sc.getOrAddDatabaseFromConfig(nonContextStruct.Ctx, config, false, db.GetConnectToBucketFn(true)) + failFast := true + return sc.getOrAddDatabaseFromConfig(nonContextStruct.Ctx, config, getOrAddDatabaseConfigOptions{useExisting: false, failFast: failFast}) } func (sc *ServerContext) processEventHandlersForEvent(ctx context.Context, events []*EventConfig, eventType db.EventType, dbcontext *db.DatabaseContext) error { @@ -1356,7 +1384,10 @@ func (sc *ServerContext) _unsuspendDatabase(ctx context.Context, dbName string) return nil, fmt.Errorf("unsuspending db %q failed due to an error while trying to retrieve latest config from bucket %q: %w", base.MD(dbName).Redact(), base.MD(bucket).Redact(), err) } dbConfig.cfgCas = cas - dbCtx, err = sc._getOrAddDatabaseFromConfig(ctx, dbConfig.DatabaseConfig, false, db.GetConnectToBucketFn(false)) + failFast := false + dbCtx, err = sc._getOrAddDatabaseFromConfig(ctx, dbConfig.DatabaseConfig, getOrAddDatabaseConfigOptions{ + useExisting: false, + failFast: failFast}) if err != nil { return nil, err } @@ -1850,7 +1881,7 @@ func (sc *ServerContext) Database(ctx context.Context, name string) *db.Database return db } -func (sc *ServerContext) initializeCouchbaseServerConnections(ctx context.Context) error { +func (sc *ServerContext) initializeCouchbaseServerConnections(ctx context.Context, failFast bool) error { base.InfofCtx(ctx, base.KeyAll, "Initializing server connections") defer func() { base.InfofCtx(ctx, base.KeyAll, "Finished initializing server connections") diff --git a/rest/server_context_test.go b/rest/server_context_test.go index 03a1224b6e..210c4e9129 100644 --- a/rest/server_context_test.go +++ b/rest/server_context_test.go @@ -22,7 +22,6 @@ import ( "time" "github.com/couchbase/sync_gateway/auth" - "github.com/couchbase/sync_gateway/db" "github.com/couchbase/gocbcore/v10/connstr" sgbucket "github.com/couchbase/sg-bucket" @@ -168,7 +167,7 @@ func TestGetOrAddDatabaseFromConfig(t *testing.T) { // Get or add database name from config without valid database name; throws 400 Illegal database name error dbConfig := DbConfig{OldRevExpirySeconds: &oldRevExpirySeconds, LocalDocExpirySecs: &localDocExpirySecs} - dbContext, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(false)) + dbContext, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, getOrAddDatabaseConfigOptions{useExisting: false, failFast: false}) assert.Nil(t, dbContext, "Can't create database context without a valid database name") assert.Error(t, err, "It should throw 400 Illegal database name") assert.Contains(t, err.Error(), strconv.Itoa(http.StatusBadRequest)) @@ -187,7 +186,10 @@ func TestGetOrAddDatabaseFromConfig(t *testing.T) { BucketConfig: BucketConfig{Server: &server, Bucket: &bucketName}, } - dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(false)) + dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: false, + }) assert.Nil(t, dbContext, "Can't create database context from config with unrecognized value for import_docs") assert.Error(t, err, "It should throw Unrecognized value for import_docs") @@ -214,14 +216,22 @@ func TestGetOrAddDatabaseFromConfig(t *testing.T) { AutoImport: false, } - dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(false)) + dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: false, + }) assert.Nil(t, dbContext, "Can't create database context with duplicate database name") assert.Error(t, err, "It should throw 412 Duplicate database names") assert.Contains(t, err.Error(), strconv.Itoa(http.StatusPreconditionFailed)) // Get or add database from config with duplicate database name and useExisting as true // Existing database context should be returned - dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, true, db.GetConnectToBucketFn(false)) + dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, + getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: true, + }) + assert.NoError(t, err, "No error while trying to get the existing database name") assert.Equal(t, server, dbContext.BucketSpec.Server) assert.Equal(t, bucketName, dbContext.BucketSpec.BucketName) @@ -615,7 +625,12 @@ func TestServerContextSetupCollectionsSupport(t *testing.T) { }, }, } - _, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(true)) + _, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, + getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: false, + }) + require.ErrorIs(t, err, errCollectionsUnsupported) } @@ -790,7 +805,11 @@ func TestDisableScopesInLegacyConfig(t *testing.T) { } dbConfig.Scopes = GetCollectionsConfigWithSyncFn(t, bucket, nil, 1) } - dbContext, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(false)) + dbContext, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, + getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: false, + }) if persistentConfig || scopes == false { require.NoError(t, err) require.NotNil(t, dbContext) diff --git a/rest/serverless_test.go b/rest/serverless_test.go index cfd0c49376..23ab48c109 100644 --- a/rest/serverless_test.go +++ b/rest/serverless_test.go @@ -51,7 +51,7 @@ func TestServerlessPollBuckets(t *testing.T) { assert.Empty(t, configs) // Create a database - rt2 := NewRestTester(t, &RestTesterConfig{CustomTestBucket: tb1.NoCloseClone(), PersistentConfig: true, groupID: &sc.Config.Bootstrap.ConfigGroupID}) + rt2 := NewRestTester(t, &RestTesterConfig{CustomTestBucket: tb1.NoCloseClone(), PersistentConfig: true, GroupID: &sc.Config.Bootstrap.ConfigGroupID}) defer rt2.Close() // Create a new db on the RT to confirm fetch won't retrieve it (due to bucket not being in BucketCredentials) resp := rt2.SendAdminRequest(http.MethodPut, "/db/", fmt.Sprintf(`{ diff --git a/rest/upgradetest/remove_collection_test.go b/rest/upgradetest/remove_collection_test.go new file mode 100644 index 0000000000..ca090396ae --- /dev/null +++ b/rest/upgradetest/remove_collection_test.go @@ -0,0 +1,112 @@ +// Copyright 2023-Present Couchbase, Inc. +// +// Use of this software is governed by the Business Source License included +// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified +// in that file, in accordance with the Business Source License, use of this +// software will be governed by the Apache License, Version 2.0, included in +// the file licenses/APL2.txt. + +package upgradetest + +import ( + "fmt" + "net/http" + "testing" + + "github.com/couchbase/sync_gateway/base" + "github.com/couchbase/sync_gateway/rest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestRemoveCollection tests when a collection has been removed from CBS, and the server is restarted. We should be able to modify or delete the database. +func TestRemoveCollection(t *testing.T) { + if base.UnitTestUrlIsWalrus() { + t.Skip("test relies on bootstrap connection and needs CBS") + } + base.TestRequiresCollections(t) + base.RequireNumTestBuckets(t, 2) + numCollections := 2 + bucket := base.GetPersistentTestBucket(t) + defer bucket.Close() + base.RequireNumTestDataStores(t, numCollections) + rtConfig := &rest.RestTesterConfig{ + CustomTestBucket: bucket.NoCloseClone(), + PersistentConfig: true, + GroupID: base.StringPtr(t.Name()), + AdminInterfaceAuthentication: true, + } + rt := rest.NewRestTesterMultipleCollections(t, rtConfig, 2) + + dbConfig := rt.NewDbConfig() + dbConfig.Scopes = rest.GetCollectionsConfig(t, rt.TestBucket, numCollections) + + dbName := "removecollectiondb" + + dbcJSON, err := base.JSONMarshal(dbConfig) + require.NoError(t, err) + resp := rt.SendAdminRequestWithAuth(http.MethodPut, "/"+dbName+"/", string(dbcJSON), base.TestClusterUsername(), base.TestClusterPassword()) + rest.RequireStatus(t, resp, http.StatusCreated) + + dataStores := rt.TestBucket.GetNonDefaultDatastoreNames() + deletedDataStore := dataStores[1] + + defer func() { + assert.NoError(t, bucket.CreateDataStore(deletedDataStore)) + + }() + // drop a data store + require.NoError(t, rt.TestBucket.DropDataStore(deletedDataStore)) + require.Len(t, rt.TestBucket.GetNonDefaultDatastoreNames(), len(dataStores)-1) + + rt.Close() + rtConfig = &rest.RestTesterConfig{ + CustomTestBucket: bucket.NoCloseClone(), + PersistentConfig: true, + GroupID: base.StringPtr(t.Name()), + AdminInterfaceAuthentication: true, + } + + rt = rest.NewRestTesterMultipleCollections(t, rtConfig, 2) + defer rt.Close() + + bucket2Role := rest.RouteRole{ + RoleName: rest.MobileSyncGatewayRole.RoleName, + DatabaseScoped: true, + } + if base.TestsUseServerCE() { + bucket2Role = rest.RouteRole{ + RoleName: rest.BucketFullAccessRole.RoleName, + DatabaseScoped: true, + } + } + + eps, httpClient, err := rt.ServerContext().ObtainManagementEndpointsAndHTTPClient() + require.NoError(t, err) + + altBucket := base.GetTestBucket(t) + defer altBucket.Close() + const password = "password2" + rest.MakeUser(t, httpClient, eps[0], bucket2Role.RoleName, password, []string{fmt.Sprintf("%s[%s]", bucket2Role.RoleName, altBucket.GetName())}) + defer rest.DeleteUser(t, httpClient, eps[0], bucket2Role.RoleName) + + delete(dbConfig.Scopes[deletedDataStore.ScopeName()].Collections, deletedDataStore.CollectionName()) + + dbcJSON, err = base.JSONMarshal(dbConfig) + require.NoError(t, err) + + resp = rt.SendAdminRequestWithAuth(http.MethodPost, "/"+dbName+"/", string(dbcJSON), base.TestClusterUsername(), base.TestClusterPassword()) + rest.RequireStatus(t, resp, http.StatusForbidden) + + // wrong RBAC user + resp = rt.SendAdminRequestWithAuth(http.MethodDelete, "/"+dbName+"/", "", bucket2Role.RoleName, password) + rest.RequireStatus(t, resp, http.StatusForbidden) + + // bad credentials + resp = rt.SendAdminRequestWithAuth(http.MethodDelete, "/"+dbName+"/", "", "baduser", "badpassword") + rest.RequireStatus(t, resp, http.StatusUnauthorized) + + resp = rt.SendAdminRequestWithAuth(http.MethodDelete, "/"+dbName+"/", "", base.TestClusterUsername(), base.TestClusterPassword()) + rest.RequireStatus(t, resp, http.StatusOK) + +} diff --git a/rest/utilities_testing.go b/rest/utilities_testing.go index 1960d983e1..a63710aa2c 100644 --- a/rest/utilities_testing.go +++ b/rest/utilities_testing.go @@ -66,7 +66,7 @@ type RestTesterConfig struct { enableAdminAuthPermissionsCheck bool useTLSServer bool // If true, TLS will be required for communications with CBS. Default: false PersistentConfig bool - groupID *string + GroupID *string serverless bool // Runs SG in serverless mode. Must be used in conjunction with persistent config collectionConfig collectionConfiguration numCollections int @@ -227,8 +227,8 @@ func (rt *RestTester) Bucket() base.Bucket { } } - if rt.RestTesterConfig.groupID != nil { - sc.Bootstrap.ConfigGroupID = *rt.RestTesterConfig.groupID + if rt.RestTesterConfig.GroupID != nil { + sc.Bootstrap.ConfigGroupID = *rt.RestTesterConfig.GroupID } else if rt.RestTesterConfig.PersistentConfig { // If running in persistent config mode, the database has to be manually created. If the db name is the same as a // past tests db name, a db already exists error could happen if the past tests bucket is still flushing. Prevent this @@ -270,7 +270,7 @@ func (rt *RestTester) Bucket() base.Bucket { rt.TestBucket.BucketSpec.TLSSkipVerify = base.TestTLSSkipVerify() - if err := rt.RestTesterServerContext.initializeCouchbaseServerConnections(ctx); err != nil { + if err := rt.RestTesterServerContext.initializeCouchbaseServerConnections(ctx, true); err != nil { panic("Couldn't initialize Couchbase Server connection: " + err.Error()) } } @@ -1181,9 +1181,14 @@ func (s *SlowResponseRecorder) Write(buf []byte) (int, error) { // AddDatabaseFromConfigWithBucket adds a database to the ServerContext and sets a specific bucket on the database context. // If an existing config is found for the name, returns an error. func (sc *ServerContext) AddDatabaseFromConfigWithBucket(ctx context.Context, tb testing.TB, config DatabaseConfig, bucket base.Bucket) (*db.DatabaseContext, error) { - return sc.getOrAddDatabaseFromConfig(ctx, config, false, func(ctx context.Context, spec base.BucketSpec) (base.Bucket, error) { - return bucket, nil - }) + options := getOrAddDatabaseConfigOptions{ + useExisting: false, + failFast: false, + connectToBucketFn: func(_ context.Context, spec base.BucketSpec, _ bool) (base.Bucket, error) { + return bucket, nil + }, + } + return sc.getOrAddDatabaseFromConfig(ctx, config, options) } // The parameters used to create a BlipTester @@ -2457,6 +2462,7 @@ func (rt *RestTester) GetChangesOneShot(t testing.TB, keyspace string, since int } func (rt *RestTester) NewDbConfig() DbConfig { + // make sure bucket has been initialized config := DbConfig{ BucketConfig: BucketConfig{ Bucket: base.StringPtr(rt.Bucket().GetName()), From c7b78bfa81edd97ad4086fdc2e15428b05358666 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Wed, 31 May 2023 05:26:45 -0400 Subject: [PATCH 33/42] Make tests pass with default collections/views (#6267) --- rest/api_test.go | 8 ++++++-- rest/cors_test.go | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/rest/api_test.go b/rest/api_test.go index 4feb418d0b..04a3c78dcc 100644 --- a/rest/api_test.go +++ b/rest/api_test.go @@ -260,8 +260,12 @@ func TestCORSOrigin(t *testing.T) { response := rt.SendRequestWithHeaders(method, "/{{.keyspace}}/", "", reqHeaders) assert.Equal(t, tc.headerOutput, response.Header().Get("Access-Control-Allow-Origin")) if method == http.MethodGet { - RequireStatus(t, response, http.StatusBadRequest) - require.Contains(t, response.Body.String(), invalidDatabaseName) + if base.TestsUseNamedCollections() { + RequireStatus(t, response, http.StatusBadRequest) + require.Contains(t, response.Body.String(), invalidDatabaseName) + } else { // CBG-2978, should not be different from GSI/collections + RequireStatus(t, response, http.StatusUnauthorized) + } } else { RequireStatus(t, response, http.StatusNoContent) diff --git a/rest/cors_test.go b/rest/cors_test.go index 5c46d660eb..506669e9d1 100644 --- a/rest/cors_test.go +++ b/rest/cors_test.go @@ -44,8 +44,12 @@ func TestCORSDynamicSet(t *testing.T) { response := rt.SendRequestWithHeaders(method, "/{{.keyspace}}/", "", reqHeaders) require.Equal(t, "http://example.com", response.Header().Get("Access-Control-Allow-Origin")) if method == http.MethodGet { - RequireStatus(t, response, http.StatusBadRequest) - require.Contains(t, response.Body.String(), invalidDatabaseName) + if base.TestsUseNamedCollections() { + RequireStatus(t, response, http.StatusBadRequest) + require.Contains(t, response.Body.String(), invalidDatabaseName) + } else { // CBG-2978, should not be different from GSI/collections + RequireStatus(t, response, http.StatusUnauthorized) + } } else { RequireStatus(t, response, http.StatusNoContent) } @@ -92,8 +96,12 @@ func TestCORSDynamicSet(t *testing.T) { response := rt.SendRequestWithHeaders(method, "/{{.keyspace}}/", "", reqHeaders) if method == http.MethodGet { require.Equal(t, "http://example.com", response.Header().Get("Access-Control-Allow-Origin")) - RequireStatus(t, response, http.StatusBadRequest) - require.Contains(t, response.Body.String(), invalidDatabaseName) + if base.TestsUseNamedCollections() { + RequireStatus(t, response, http.StatusBadRequest) + require.Contains(t, response.Body.String(), invalidDatabaseName) + } else { // CBG-2978, should not be different from GSI/collections + RequireStatus(t, response, http.StatusUnauthorized) + } } else { // information leak: the options request knows about the database and knows it doesn't match require.Equal(t, "", response.Header().Get("Access-Control-Allow-Origin")) From 2940cb164c7dcd2cb9aa01c8f425411f92f56b30 Mon Sep 17 00:00:00 2001 From: Gregory Newman-Smith <109068393+gregns1@users.noreply.github.com> Date: Wed, 31 May 2023 15:02:30 +0100 Subject: [PATCH 34/42] CBG-3043: pick up cbgt fix for panic in import feed (#6270) --- go.mod | 16 ++++++++-------- go.sum | 32 ++++++++++++++++---------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/go.mod b/go.mod index d13419c0d2..6fa234c045 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,13 @@ go 1.19 require ( github.com/bhoriuchi/graphql-go-tools v1.0.0 github.com/coreos/go-oidc v2.2.1+incompatible - github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 + github.com/couchbase/cbgt v1.3.4 github.com/couchbase/clog v0.1.0 github.com/couchbase/go-blip v0.0.0-20230510201532-fcadc404bd41 github.com/couchbase/go-couchbase v0.1.1 github.com/couchbase/gocb/v2 v2.6.2 - github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911 - github.com/couchbase/gomemcached v0.1.4 + github.com/couchbase/gocbcore/v10 v10.2.4-0.20230511103754-8dd1a95f5f33 + github.com/couchbase/gomemcached v0.2.1 github.com/couchbase/goutils v0.1.2 github.com/couchbase/sg-bucket v0.0.0-20230113211151-ac6a75f57046 github.com/couchbaselabs/go-fleecedelta v0.0.0-20200408160354-2ed3f45fde8f @@ -31,9 +31,9 @@ require ( github.com/samuel/go-metrics v0.0.0-20150819231912-7ccf3e0e1fb1 github.com/shirou/gopsutil v3.21.11+incompatible github.com/stretchr/testify v1.8.2 - golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa + golang.org/x/crypto v0.7.0 golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e - golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1 + golang.org/x/net v0.8.0 golang.org/x/oauth2 v0.0.0-20220718184931-c8730f7fcb92 gopkg.in/couchbaselabs/gocbconnstr.v1 v1.0.4 gopkg.in/square/go-jose.v2 v2.6.0 @@ -46,7 +46,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/couchbase/blance v0.1.3 // indirect - github.com/couchbase/cbauth v0.1.9 // indirect + github.com/couchbase/cbauth v0.1.10 // indirect github.com/couchbase/tools-common v0.0.0-20220810163003-4c3c185822d4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-ole/go-ole v1.2.6 // indirect @@ -68,8 +68,8 @@ require ( github.com/stretchr/objx v0.5.0 // indirect github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 // indirect - golang.org/x/text v0.4.0 // indirect + golang.org/x/sys v0.6.0 // indirect + golang.org/x/text v0.8.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.28.0 // indirect gopkg.in/couchbase/gocb.v1 v1.6.7 // indirect diff --git a/go.sum b/go.sum index f8dfff29f7..cfd74028c3 100644 --- a/go.sum +++ b/go.sum @@ -63,10 +63,10 @@ github.com/coreos/go-oidc v2.2.1+incompatible h1:mh48q/BqXqgjVHpy2ZY7WnWAbenxRjs github.com/coreos/go-oidc v2.2.1+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= github.com/couchbase/blance v0.1.3 h1:CJCirD3+N02Z0w/ybZTqqSJa9XMbsCZO9jHxCEAPQqE= github.com/couchbase/blance v0.1.3/go.mod h1:2Sa/nsJSieN/r3T9LsrUYWeQ015qDsuHybhz4F4JcHU= -github.com/couchbase/cbauth v0.1.9 h1:eco/KAIyEIeLNcEXFG6BTsCLPTVAI28lu9FH91ehvM4= -github.com/couchbase/cbauth v0.1.9/go.mod h1:overPK2NvYkkZBWX0eqmuHB82nhhs8rt7A4wi5u7c2g= -github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 h1:tRxeXfSHBzAq6mtc9NCXLy+BfI3SFMdQZFH3rYarw5M= -github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46/go.mod h1:tJF3TUUO3ZDBU15auN1gNsIVY3Oo+jj46zIXH4RBxk4= +github.com/couchbase/cbauth v0.1.10 h1:ixJGG9mAgL1HnWKt2eKmJ8gJa0rkaUURtDPDak7Dcts= +github.com/couchbase/cbauth v0.1.10/go.mod h1:YHG+5rUI6GoLLlIViT9IfD0nwqEebBcl66TQ4pDFDw4= +github.com/couchbase/cbgt v1.3.4 h1:T5Wi4TOxRRYSYyhgl9R5mwxpQ8wo7pwTkzbomj9dBVM= +github.com/couchbase/cbgt v1.3.4/go.mod h1:PIJ8U/25mrhsn4Dc3eK9iLXZ98jX1i2YuGH2Od+JVv4= github.com/couchbase/clog v0.1.0 h1:4Kh/YHkhRjMCbdQuvRVsm39XZh4FtL1d8fAwJsHrEPY= github.com/couchbase/clog v0.1.0/go.mod h1:7tzUpEOsE+fgU81yfcjy5N1H6XtbVC8SgOz/3mCjmd4= github.com/couchbase/go-blip v0.0.0-20230510201532-fcadc404bd41 h1:pjBwvGjhloggITOU9Fqg4yQ/lbZJUHnz8OsYUUczQDw= @@ -76,10 +76,10 @@ github.com/couchbase/go-couchbase v0.1.1/go.mod h1:+/bddYDxXsf9qt0xpDUtRR47A2Gja github.com/couchbase/gocb/v2 v2.6.2 h1:sZg0+3GiYW7OT53ENEGnkkQMXhVuJ1qOJplvZDlM5Xk= github.com/couchbase/gocb/v2 v2.6.2/go.mod h1:baRw5pIpzHil7q39M2zm+bon+ZgjgNAilkKI813zPiE= github.com/couchbase/gocbcore/v10 v10.2.2/go.mod h1:lYQIIk+tzoMcwtwU5GzPbDdqEkwkH3isI2rkSpfL0oM= -github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911 h1:w9pxVkd0o0Fzk6QgtZGaPj1g9Bt7sNO1eLm2rIteQTA= -github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911/go.mod h1:lYQIIk+tzoMcwtwU5GzPbDdqEkwkH3isI2rkSpfL0oM= -github.com/couchbase/gomemcached v0.1.4 h1:5n5wmr4dBu+X7XteP8QHP5S9inK9MBjNpN9b7WSQfuA= -github.com/couchbase/gomemcached v0.1.4/go.mod h1:mxliKQxOv84gQ0bJWbI+w9Wxdpt9HjDvgW9MjCym5Vo= +github.com/couchbase/gocbcore/v10 v10.2.4-0.20230511103754-8dd1a95f5f33 h1:l6O5889o5cJBwWt1gws5q5dzi/slDba/KvGNiWY9ieQ= +github.com/couchbase/gocbcore/v10 v10.2.4-0.20230511103754-8dd1a95f5f33/go.mod h1:lYQIIk+tzoMcwtwU5GzPbDdqEkwkH3isI2rkSpfL0oM= +github.com/couchbase/gomemcached v0.2.1 h1:lDONROGbklo8pOt4Sr4eV436PVEaKDr3o9gUlhv9I2U= +github.com/couchbase/gomemcached v0.2.1/go.mod h1:mxliKQxOv84gQ0bJWbI+w9Wxdpt9HjDvgW9MjCym5Vo= github.com/couchbase/goutils v0.1.2 h1:gWr8B6XNWPIhfalHNog3qQKfGiYyh4K4VhO3P2o9BCs= github.com/couchbase/goutils v0.1.2/go.mod h1:h89Ek/tiOxxqjz30nPPlwZdQbdB8BwgnuBxeoUe/ViE= github.com/couchbase/sg-bucket v0.0.0-20230113211151-ac6a75f57046 h1:fyGdhMTONSnC9Sqhc0f9KXhT5Pmpst7obe3Tg92xQsk= @@ -333,8 +333,8 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa h1:zuSxTR4o9y82ebqCUJYNGJbGPo6sKVl54f/TVDObg1c= -golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.7.0 h1:AvwMYaRytfdeVt3u6mLaxYtErKYjxA2OXjJ1HHq6t3A= +golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -397,8 +397,8 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1 h1:TWZxd/th7FbRSMret2MVQdlI8uT49QEtwZdvJrxjEHU= -golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= +golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= +golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -459,8 +459,8 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 h1:WIoqL4EROvwiPdUtaip4VcDdpZ4kha7wBWZrbVKCIZg= -golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -470,8 +470,8 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= -golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= +golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From 7dff1d172880c6d3fab1441531d3cbafc0a616c1 Mon Sep 17 00:00:00 2001 From: Gregory Newman-Smith <109068393+gregns1@users.noreply.github.com> Date: Thu, 1 Jun 2023 14:23:55 +0100 Subject: [PATCH 35/42] CBG-3028: fixes for failing CE tests (#6279) * CBG-3028: fixes for failing CE tests * remove print line * updates off comment --- rest/config_database.go | 6 +++++- rest/replicatortest/replicator_test.go | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/rest/config_database.go b/rest/config_database.go index 335192a0e0..8d10f75ac2 100644 --- a/rest/config_database.go +++ b/rest/config_database.go @@ -90,6 +90,10 @@ func MergeDatabaseConfigWithDefaults(sc *StartupConfig, dbConfig *DbConfig) (*Db // to provide defaults to include_runtime config endpoints. // Note that this does not include unsupported options func DefaultDbConfig(sc *StartupConfig) *DbConfig { + var partitions *uint16 + if base.IsEnterpriseEdition() { + partitions = base.Uint16Ptr(base.GetDefaultImportPartitions(sc.IsServerless())) + } dbConfig := DbConfig{ BucketConfig: BucketConfig{}, Name: "", @@ -98,7 +102,7 @@ func DefaultDbConfig(sc *StartupConfig) *DbConfig { Roles: nil, RevsLimit: nil, // Set this below struct AutoImport: base.BoolPtr(base.DefaultAutoImport), - ImportPartitions: base.Uint16Ptr(base.GetDefaultImportPartitions(sc.IsServerless())), + ImportPartitions: partitions, ImportFilter: nil, ImportBackupOldRev: base.BoolPtr(false), EventHandlers: nil, diff --git a/rest/replicatortest/replicator_test.go b/rest/replicatortest/replicator_test.go index e829fa1f61..c5410b17f5 100644 --- a/rest/replicatortest/replicator_test.go +++ b/rest/replicatortest/replicator_test.go @@ -1901,8 +1901,8 @@ func TestDBReplicationStatsTeardown(t *testing.T) { // If CE, recreate the replication if !base.IsEnterpriseEdition() { - rt.CreateReplication("repl1", db2Url.String(), db.ActiveReplicatorTypePush, nil, true, db.ConflictResolverDefault) - rt.WaitForReplicationStatus("repl1", db.ReplicationStateRunning) + rt.CreateReplicationForDB("{{.db1}}", "repl1", db2Url.String(), db.ActiveReplicatorTypePush, nil, true, db.ConflictResolverDefault) + rt.WaitForReplicationStatusForDB("{{.db1}}", "repl1", db.ReplicationStateRunning) } // Wait for second document to replicate to confirm replication restart From 76f92e08564bd3255a4af9835914c5ef1701be8a Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Thu, 1 Jun 2023 10:15:27 -0400 Subject: [PATCH 36/42] CBG-2857 Remove unambiguous timeouts from triggering cbcollections (#6280) --- jenkins-integration-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins-integration-build.sh b/jenkins-integration-build.sh index 509d0f52ee..6504d673dd 100755 --- a/jenkins-integration-build.sh +++ b/jenkins-integration-build.sh @@ -149,7 +149,7 @@ if [ "${PIPESTATUS[0]}" -ne "0" ]; then # If test exit code is not 0 (failed) fi # Collect CBS logs if server error occurred -if [ "${SG_CBCOLLECT_ALWAYS:-}" == "true" ] || grep -a -q "server logs for details\|Timed out after 1m0s waiting for a bucket to become available\|unambiguous timeout" "${INT_LOG_FILE_NAME}.out.raw"; then +if [ "${SG_CBCOLLECT_ALWAYS:-}" == "true" ] || grep -a -q "server logs for details\|Timed out after 1m0s waiting for a bucket to become available" "${INT_LOG_FILE_NAME}.out.raw"; then docker exec -t couchbase /opt/couchbase/bin/cbcollect_info /workspace/cbcollect.zip fi From b01ed47ed1d848d4cae15c76817b611b5e6a64d1 Mon Sep 17 00:00:00 2001 From: Gregory Newman-Smith <109068393+gregns1@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:41:17 +0100 Subject: [PATCH 37/42] CBG-2793: attachment compaction code erroneously sets failOnRollback (#6258) * CBG-2793: attachment compaction code erroneously sets failOnRollback * spelling errors and test logging removal + lint error * updates for comments and try fix race condition * linter error fix * changes for jenkins * updates to add handling for cleanup phase * remove logging line * fix race in Jenkins * updates to fix collections issue * updates based off comment * updates * updates to address complexity comments --- base/dcp_client.go | 16 +-- base/dcp_client_metadata.go | 11 +++ base/dcp_client_test.go | 41 +++++--- db/attachment_compaction.go | 49 +++++----- db/attachment_compaction_test.go | 97 +++++++++++++++++-- db/background_mgr_attachment_compaction.go | 84 +++++++++++++++- .../attachment_compaction_api_test.go | 63 ++++++++++++ 7 files changed, 308 insertions(+), 53 deletions(-) diff --git a/base/dcp_client.go b/base/dcp_client.go index a973e7dd38..d57cf465c7 100644 --- a/base/dcp_client.go +++ b/base/dcp_client.go @@ -26,7 +26,7 @@ import ( const openStreamTimeout = 30 * time.Second const openRetryCount = uint32(10) -const defaultNumWorkers = 8 +const DefaultNumWorkers = 8 // DCP buffer size if we are running in serverless const DefaultDCPBufferServerless = 1 * 1024 * 1024 @@ -37,7 +37,7 @@ const infiniteOpenStreamRetries = uint32(math.MaxUint32) type endStreamCallbackFunc func(e endStreamEvent) -var errVbUUIDMismatch = errors.New("VbUUID mismatch when failOnRollback set") +var ErrVbUUIDMismatch = errors.New("VbUUID mismatch when failOnRollback set") type DCPClient struct { ID string // unique ID for DCPClient - used for DCP stream name, must be unique @@ -81,7 +81,7 @@ type DCPClientOptions struct { func NewDCPClient(ID string, callback sgbucket.FeedEventCallbackFunc, options DCPClientOptions, bucket *GocbV2Bucket) (*DCPClient, error) { - numWorkers := defaultNumWorkers + numWorkers := DefaultNumWorkers if options.NumWorkers > 0 { numWorkers = options.NumWorkers } @@ -447,7 +447,7 @@ func (dc *DCPClient) openStream(vbID uint16, maxRetries uint32) error { case errors.As(openStreamErr, &rollbackErr): if dc.failOnRollback { InfofCtx(logCtx, KeyDCP, "Open stream for vbID %d failed due to rollback or range error, closing client based on failOnRollback=true", vbID) - return fmt.Errorf("%s, failOnRollback requested", openStreamErr) + return fmt.Errorf("%w, failOnRollback requested", openStreamErr) } InfofCtx(logCtx, KeyDCP, "Open stream for vbID %d failed due to rollback or range error, will roll back metadata and retry: %v", vbID, openStreamErr) @@ -456,7 +456,7 @@ func (dc *DCPClient) openStream(vbID uint16, maxRetries uint32) error { err := fmt.Errorf("Invalid metadata out of range for vbID %d, err: %v metadata %+v, shutting down agent", vbID, openStreamErr, dc.metadata.GetMeta(vbID)) WarnfCtx(logCtx, "%s", err) return err - case errors.Is(openStreamErr, errVbUUIDMismatch): + case errors.Is(openStreamErr, ErrVbUUIDMismatch): WarnfCtx(logCtx, "Closing Stream for vbID: %d, %s", vbID, openStreamErr) return openStreamErr case errors.Is(openStreamErr, gocbcore.ErrShutdown): @@ -556,7 +556,7 @@ func (dc *DCPClient) verifyFailoverLog(vbID uint16, f []gocbcore.FailoverEntry) currentVbUUID := getLatestVbUUID(f) // if previousVbUUID hasn't been set yet (is zero), don't treat as rollback. if previousMeta.VbUUID != currentVbUUID { - return errVbUUIDMismatch + return ErrVbUUIDMismatch } } return nil @@ -657,3 +657,7 @@ func getLatestVbUUID(failoverLog []gocbcore.FailoverEntry) (vbUUID gocbcore.VbUU entry := failoverLog[len(failoverLog)-1] return entry.VbUUID } + +func (dc *DCPClient) GetMetadataKeyPrefix() string { + return dc.metadata.GetKeyPrefix() +} diff --git a/base/dcp_client_metadata.go b/base/dcp_client_metadata.go index 4137421ba1..8a5542bac6 100644 --- a/base/dcp_client_metadata.go +++ b/base/dcp_client_metadata.go @@ -62,6 +62,9 @@ type DCPMetadataStore interface { // Purge removes all metadata associated with the metadata store from the bucket. It does not remove the // in-memory metadata. Purge(numWorkers int) + + // GetKeyPrefix will retrieve the key prefix used for metadata persistence + GetKeyPrefix() string } type dcpMetadataBase struct { @@ -155,6 +158,10 @@ func (md *DCPMetadataMem) Purge(numWorkers int) { return } +func (md *DCPMetadataMem) GetKeyPrefix() string { + return "" +} + // Reset sets metadata sequences to zero, but maintains vbucket UUID and failover entries. Used for scenarios // that want to restart a feed from zero, but detect failover func (md *DCPMetadata) Reset() { @@ -261,6 +268,10 @@ func (m *DCPMetadataCS) Purge(numWorkers int) { } } +func (m *DCPMetadataCS) GetKeyPrefix() string { + return m.keyPrefix +} + func (m *DCPMetadataCS) getMetadataKey(workerID int) string { return fmt.Sprintf("%s%d", m.keyPrefix, workerID) } diff --git a/base/dcp_client_test.go b/base/dcp_client_test.go index 87994ad828..15647522e8 100644 --- a/base/dcp_client_test.go +++ b/base/dcp_client_test.go @@ -341,7 +341,7 @@ func TestContinuousDCPRollback(t *testing.T) { counterCallback := func(event sgbucket.FeedEvent) bool { if bytes.HasPrefix(event.Key, []byte(t.Name())) { atomic.AddUint64(&mutationCount, 1) - if atomic.LoadUint64(&mutationCount) == uint64(1000) { + if atomic.LoadUint64(&mutationCount) == uint64(10000) { c <- true } } @@ -374,14 +374,11 @@ func TestContinuousDCPRollback(t *testing.T) { dcpClient, err := NewDCPClient(feedID, counterCallback, dcpClientOpts, gocbv2Bucket) require.NoError(t, err) - // function to force the rollback of some vBuckets - dcpClient.forceRollbackvBucket(vbUUID) - _, startErr := dcpClient.Start() require.NoError(t, startErr) // Add documents - const numDocs = 1000 + const numDocs = 10000 updatedBody := map[string]interface{}{"foo": "bar"} for i := 0; i < numDocs; i++ { key := fmt.Sprintf("%s_%d", t.Name(), i) @@ -393,17 +390,38 @@ func TestContinuousDCPRollback(t *testing.T) { select { case <-c: mutationCount := atomic.LoadUint64(&mutationCount) - require.Equal(t, uint64(1000), mutationCount) + require.Equal(t, uint64(10000), mutationCount) case <-timeout: t.Fatalf("timeout on client reached") } + // new dcp client to simulate a rollback + dcpClientOpts = DCPClientOptions{ + InitialMetadata: dcpClient.GetMetadata(), + FailOnRollback: false, + OneShot: false, + CollectionIDs: collectionIDs, + CheckpointPrefix: DefaultMetadataKeys.DCPCheckpointPrefix(t.Name()), + MetadataStoreType: DCPMetadataStoreInMemory, + } + require.NoError(t, dcpClient.Close()) + + dcpClient1, err := NewDCPClient(feedID, counterCallback, dcpClientOpts, gocbv2Bucket) + require.NoError(t, err) + // function to force the rollback of some vBuckets + dcpClient1.forceRollbackvBucket(vbUUID) + + _, startErr = dcpClient1.Start() + require.NoError(t, startErr) + // Assert that the number of vBuckets active are the same as the total number of vBuckets on the client. // In continuous rollback the streams should not close after they're finished. - numVBuckets := len(dcpClient.activeVbuckets) - require.Equal(t, dcpClient.numVbuckets, uint16(numVBuckets)) + numVBuckets := len(dcpClient1.activeVbuckets) + require.Equal(t, dcpClient1.numVbuckets, uint16(numVBuckets)) - require.NoError(t, dcpClient.Close()) + defer func() { + assert.NoError(t, dcpClient1.Close()) + }() } @@ -412,13 +430,12 @@ func TestContinuousDCPRollback(t *testing.T) { func (dc *DCPClient) forceRollbackvBucket(uuid gocbcore.VbUUID) { metadata := make([]DCPMetadata, dc.numVbuckets) for i := uint16(0); i < dc.numVbuckets; i++ { + // rollback roughly half the vBuckets if i%2 == 0 { metadata[i] = dc.metadata.GetMeta(i) metadata[i].VbUUID = uuid - } else { - metadata[i] = dc.metadata.GetMeta(i) + dc.metadata.SetMeta(i, metadata[i]) } - dc.metadata.SetMeta(i, metadata[i]) } } diff --git a/db/attachment_compaction.go b/db/attachment_compaction.go index d0e565d0ec..cd5fb914bf 100644 --- a/db/attachment_compaction.go +++ b/db/attachment_compaction.go @@ -29,7 +29,7 @@ const ( CleanupPhase = "cleanup" ) -func attachmentCompactMarkPhase(ctx context.Context, dataStore base.DataStore, collectionID uint32, db *Database, compactionID string, terminator *base.SafeTerminator, markedAttachmentCount *base.AtomicInt) (count int64, vbUUIDs []uint64, err error) { +func attachmentCompactMarkPhase(ctx context.Context, dataStore base.DataStore, collectionID uint32, db *Database, compactionID string, terminator *base.SafeTerminator, markedAttachmentCount *base.AtomicInt) (count int64, vbUUIDs []uint64, checkpointPrefix string, err error) { base.InfofCtx(ctx, base.KeyAll, "Starting first phase of attachment compaction (mark phase) with compactionID: %q", compactionID) compactionLoggingID := "Compaction Mark: " + compactionID @@ -131,32 +131,30 @@ func attachmentCompactMarkPhase(ctx context.Context, dataStore base.DataStore, c clientOptions, err := getCompactionDCPClientOptions(collectionID, db.Options.GroupID, db.MetadataKeys.DCPCheckpointPrefix(db.Options.GroupID)) if err != nil { - return 0, nil, err + return 0, nil, "", err } base.InfofCtx(ctx, base.KeyAll, "[%s] Starting DCP feed for mark phase of attachment compaction", compactionLoggingID) - dcpFeedKey := generateCompactionDCPStreamName(compactionID, MarkPhase) - if err != nil { - return 0, nil, err - } + dcpFeedKey := GenerateCompactionDCPStreamName(compactionID, MarkPhase) bucket, err := base.AsGocbV2Bucket(db.Bucket) if err != nil { - return 0, nil, err + return 0, nil, "", err } dcpClient, err := base.NewDCPClient(dcpFeedKey, callback, *clientOptions, bucket) if err != nil { base.WarnfCtx(ctx, "[%s] Failed to create attachment compaction DCP client! %v", compactionLoggingID, err) - return 0, nil, err + return 0, nil, "", err } + metadataKeyPrefix := dcpClient.GetMetadataKeyPrefix() doneChan, err := dcpClient.Start() if err != nil { base.WarnfCtx(ctx, "[%s] Failed to start attachment compaction DCP feed! %v", compactionLoggingID, err) _ = dcpClient.Close() - return 0, nil, err + return 0, nil, metadataKeyPrefix, err } base.DebugfCtx(ctx, base.KeyAll, "[%s] DCP feed started.", compactionLoggingID) @@ -165,27 +163,27 @@ func attachmentCompactMarkPhase(ctx context.Context, dataStore base.DataStore, c base.InfofCtx(ctx, base.KeyAll, "[%s] Mark phase of attachment compaction completed. Marked %d attachments", compactionLoggingID, markedAttachmentCount.Value()) err = dcpClient.Close() if markProcessFailureErr != nil { - return markedAttachmentCount.Value(), nil, markProcessFailureErr + return markedAttachmentCount.Value(), nil, metadataKeyPrefix, markProcessFailureErr } case <-terminator.Done(): base.DebugfCtx(ctx, base.KeyAll, "[%s] Terminator closed. Stopping mark phase.", compactionLoggingID) err = dcpClient.Close() if markProcessFailureErr != nil { - return markedAttachmentCount.Value(), nil, markProcessFailureErr + return markedAttachmentCount.Value(), nil, metadataKeyPrefix, markProcessFailureErr } if err != nil { - return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), err + return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), metadataKeyPrefix, err } err = <-doneChan if err != nil { - return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), err + return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), metadataKeyPrefix, err } base.InfofCtx(ctx, base.KeyAll, "[%s] Mark phase of attachment compaction was terminated. Marked %d attachments", compactionLoggingID, markedAttachmentCount.Value()) } - return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), err + return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), metadataKeyPrefix, err } // AttachmentsMetaMap struct is a very minimal struct to unmarshal into when getting attachments from bodies @@ -363,7 +361,7 @@ func attachmentCompactSweepPhase(ctx context.Context, dataStore base.DataStore, } clientOptions.InitialMetadata = base.BuildDCPMetadataSliceFromVBUUIDs(vbUUIDs) - dcpFeedKey := generateCompactionDCPStreamName(compactionID, SweepPhase) + dcpFeedKey := GenerateCompactionDCPStreamName(compactionID, SweepPhase) bucket, err := base.AsGocbV2Bucket(db.Bucket) if err != nil { @@ -408,7 +406,7 @@ func attachmentCompactSweepPhase(ctx context.Context, dataStore base.DataStore, return purgedAttachmentCount.Value(), err } -func attachmentCompactCleanupPhase(ctx context.Context, dataStore base.DataStore, collectionID uint32, db *Database, compactionID string, vbUUIDs []uint64, terminator *base.SafeTerminator) error { +func attachmentCompactCleanupPhase(ctx context.Context, dataStore base.DataStore, collectionID uint32, db *Database, compactionID string, vbUUIDs []uint64, terminator *base.SafeTerminator) (string, error) { base.InfofCtx(ctx, base.KeyAll, "Starting third phase of attachment compaction (cleanup phase) with compactionID: %q", compactionID) compactionLoggingID := "Compaction Cleanup: " + compactionID @@ -495,31 +493,32 @@ func attachmentCompactCleanupPhase(ctx context.Context, dataStore base.DataStore clientOptions, err := getCompactionDCPClientOptions(collectionID, db.Options.GroupID, db.MetadataKeys.DCPCheckpointPrefix(db.Options.GroupID)) if err != nil { - return err + return "", err } clientOptions.InitialMetadata = base.BuildDCPMetadataSliceFromVBUUIDs(vbUUIDs) base.InfofCtx(ctx, base.KeyAll, "[%s] Starting DCP feed for cleanup phase of attachment compaction", compactionLoggingID) - dcpFeedKey := generateCompactionDCPStreamName(compactionID, CleanupPhase) + dcpFeedKey := GenerateCompactionDCPStreamName(compactionID, CleanupPhase) bucket, err := base.AsGocbV2Bucket(db.Bucket) if err != nil { - return err + return "", err } dcpClient, err := base.NewDCPClient(dcpFeedKey, callback, *clientOptions, bucket) if err != nil { base.WarnfCtx(ctx, "[%s] Failed to create attachment compaction DCP client! %v", compactionLoggingID, err) - return err + return "", err } + metadataKeyPrefix := dcpClient.GetMetadataKeyPrefix() doneChan, err := dcpClient.Start() if err != nil { base.WarnfCtx(ctx, "[%s] Failed to start attachment compaction DCP feed! %v", compactionLoggingID, err) // simplify close in CBG-2234 _ = dcpClient.Close() - return err + return metadataKeyPrefix, err } select { @@ -532,18 +531,18 @@ func attachmentCompactCleanupPhase(ctx context.Context, dataStore base.DataStore err = dcpClient.Close() if err != nil { base.WarnfCtx(ctx, "[%s] Failed to close attachment compaction DCP client! %v", compactionLoggingID, err) - return err + return metadataKeyPrefix, err } err = <-doneChan if err != nil { - return err + return metadataKeyPrefix, err } base.InfofCtx(ctx, base.KeyAll, "[%s] Cleanup phase of attachment compaction was terminated", compactionLoggingID) } - return err + return metadataKeyPrefix, err } // getCompactionIDSubDocPath is just a tiny helper func that just concatenates the subdoc path we're using to store @@ -566,7 +565,7 @@ func getCompactionDCPClientOptions(collectionID uint32, groupID string, prefix s } -func generateCompactionDCPStreamName(compactionID, compactionAction string) string { +func GenerateCompactionDCPStreamName(compactionID, compactionAction string) string { return fmt.Sprintf( "sg-%v:att_compaction:%v_%v", base.ProductAPIVersion, diff --git a/db/attachment_compaction_test.go b/db/attachment_compaction_test.go index 17a00bc56a..be96483a22 100644 --- a/db/attachment_compaction_test.go +++ b/db/attachment_compaction_test.go @@ -18,6 +18,7 @@ import ( "testing" "time" + "github.com/couchbase/gocbcore/v10" "github.com/couchbase/sync_gateway/base" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -60,7 +61,7 @@ func TestAttachmentMark(t *testing.T) { attKeys = append(attKeys, createDocWithInBodyAttachment(t, ctx, "inBodyDoc", []byte(`{}`), "attForInBodyRef", []byte(`{"val": "inBodyAtt"}`), databaseCollection)) terminator := base.NewSafeTerminator() - attachmentsMarked, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, t.Name(), terminator, &base.AtomicInt{}) + attachmentsMarked, _, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, t.Name(), terminator, &base.AtomicInt{}) assert.NoError(t, err) assert.Equal(t, int64(13), attachmentsMarked) @@ -196,7 +197,7 @@ func TestAttachmentCleanup(t *testing.T) { } terminator := base.NewSafeTerminator() - err := attachmentCompactCleanupPhase(ctx, dataStore, collectionID, testDb, t.Name(), nil, terminator) + _, err := attachmentCompactCleanupPhase(ctx, dataStore, collectionID, testDb, t.Name(), nil, terminator) assert.NoError(t, err) for _, docID := range singleMarkedAttIDs { @@ -234,6 +235,90 @@ func TestAttachmentCleanup(t *testing.T) { } +func TestAttachmentCleanupRollback(t *testing.T) { + if base.UnitTestUrlIsWalrus() { + t.Skip("This test only works against Couchbase Server since it requires DCP") + } + base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll) + dbcOptions := DatabaseContextOptions{ + Scopes: GetScopesOptionsDefaultCollectionOnly(t), + } + testDb, ctx := SetupTestDBWithOptions(t, dbcOptions) + defer testDb.Close(ctx) + + var garbageVBUUID gocbcore.VbUUID = 1234 + collection := GetSingleDatabaseCollection(t, testDb.DatabaseContext) + dataStore := collection.dataStore + collectionID := collection.GetCollectionID() + + makeMarkedDoc := func(docid string, compactID string) { + err := dataStore.SetRaw(docid, 0, nil, []byte("{}")) + assert.NoError(t, err) + _, err = dataStore.SetXattr(docid, getCompactionIDSubDocPath(compactID), []byte(strconv.Itoa(int(time.Now().Unix())))) + assert.NoError(t, err) + } + + // create some marked attachments + singleMarkedAttIDs := make([]string, 0, 100) + for i := 0; i < 100; i++ { + docID := fmt.Sprintf("%s%s%d", base.AttPrefix, "marked", i) + makeMarkedDoc(docID, t.Name()) + singleMarkedAttIDs = append(singleMarkedAttIDs, docID) + } + + // assert there are marked attachments to clean up + for _, docID := range singleMarkedAttIDs { + var xattr map[string]interface{} + _, err := dataStore.GetXattr(docID, base.AttachmentCompactionXattrName, &xattr) + assert.NoError(t, err) + } + + bucket, err := base.AsGocbV2Bucket(testDb.Bucket) + require.NoError(t, err) + dcpFeedKey := GenerateCompactionDCPStreamName(t.Name(), CleanupPhase) + clientOptions, err := getCompactionDCPClientOptions(collectionID, testDb.Options.GroupID, testDb.MetadataKeys.DCPCheckpointPrefix(testDb.Options.GroupID)) + require.NoError(t, err) + dcpClient, err := base.NewDCPClient(dcpFeedKey, nil, *clientOptions, bucket) + require.NoError(t, err) + + // alter dcp metadata to feed into the compaction manager + vbUUID := base.GetVBUUIDs(dcpClient.GetMetadata()) + vbUUID[0] = uint64(garbageVBUUID) + + metadataKeys := base.NewMetadataKeys(testDb.Options.MetadataID) + testDb.AttachmentCompactionManager = NewAttachmentCompactionManager(dataStore, metadataKeys) + manager := AttachmentCompactionManager{CompactID: t.Name(), Phase: CleanupPhase, VBUUIDs: vbUUID} + testDb.AttachmentCompactionManager.Process = &manager + + terminator := base.NewSafeTerminator() + err = testDb.AttachmentCompactionManager.Process.Run(ctx, map[string]interface{}{"database": testDb}, testDb.AttachmentCompactionManager.UpdateStatusClusterAware, terminator) + require.NoError(t, err) + + err = WaitForConditionWithOptions(func() bool { + var status AttachmentManagerResponse + rawStatus, err := testDb.AttachmentCompactionManager.GetStatus() + assert.NoError(t, err) + err = base.JSONUnmarshal(rawStatus, &status) + require.NoError(t, err) + + if status.State == BackgroundProcessStateCompleted { + return true + } + + return false + }, 100, 1000) + require.NoError(t, err) + + // assert that the marked attachments have been "cleaned up" + for _, docID := range singleMarkedAttIDs { + var xattr map[string]interface{} + _, err := dataStore.GetXattr(docID, base.AttachmentCompactionXattrName, &xattr) + assert.Error(t, err) + assert.True(t, errors.Is(err, base.ErrXattrNotFound)) + } + +} + func TestAttachmentMarkAndSweepAndCleanup(t *testing.T) { base.SetUpTestLogging(t, base.LevelDebug, base.KeyAll) if base.UnitTestUrlIsWalrus() { @@ -271,7 +356,7 @@ func TestAttachmentMarkAndSweepAndCleanup(t *testing.T) { } terminator := base.NewSafeTerminator() - attachmentsMarked, vbUUIDS, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, t.Name(), terminator, &base.AtomicInt{}) + attachmentsMarked, vbUUIDS, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, t.Name(), terminator, &base.AtomicInt{}) assert.NoError(t, err) assert.Equal(t, int64(10), attachmentsMarked) @@ -293,7 +378,7 @@ func TestAttachmentMarkAndSweepAndCleanup(t *testing.T) { } } - err = attachmentCompactCleanupPhase(ctx, dataStore, collectionID, testDb, t.Name(), vbUUIDS, terminator) + _, err = attachmentCompactCleanupPhase(ctx, dataStore, collectionID, testDb, t.Name(), vbUUIDS, terminator) assert.NoError(t, err) for _, attDocKey := range attKeys { @@ -620,7 +705,7 @@ func TestAttachmentDifferentVBUUIDsBetweenPhases(t *testing.T) { // Run mark phase as usual terminator := base.NewSafeTerminator() - _, vbUUIDs, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDB, t.Name(), terminator, &base.AtomicInt{}) + _, vbUUIDs, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDB, t.Name(), terminator, &base.AtomicInt{}) assert.NoError(t, err) // Manually modify a vbUUID and ensure the Sweep phase errors @@ -891,7 +976,7 @@ func TestAttachmentCompactIncorrectStat(t *testing.T) { stat := &base.AtomicInt{} count := int64(0) go func() { - attachmentCount, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, "mark", terminator, stat) + attachmentCount, _, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, "mark", terminator, stat) atomic.StoreInt64(&count, attachmentCount) require.NoError(t, err) }() diff --git a/db/background_mgr_attachment_compaction.go b/db/background_mgr_attachment_compaction.go index d085eac459..de28b8a0e8 100644 --- a/db/background_mgr_attachment_compaction.go +++ b/db/background_mgr_attachment_compaction.go @@ -10,9 +10,11 @@ package db import ( "context" + "errors" "sync" "time" + "github.com/couchbase/gocbcore/v10" "github.com/couchbase/sync_gateway/base" "github.com/google/uuid" ) @@ -100,6 +102,23 @@ func (a *AttachmentCompactionManager) Init(ctx context.Context, options map[stri return newRunInit() } +func (a *AttachmentCompactionManager) PurgeDCPMetadata(ctx context.Context, datastore base.DataStore, database *Database, metadataKeyPrefix string) error { + + bucket, err := base.AsGocbV2Bucket(database.Bucket) + if err != nil { + return err + } + numVbuckets, err := bucket.GetMaxVbno() + if err != nil { + return err + } + + metadata := base.NewDCPMetadataCS(datastore, numVbuckets, base.DefaultNumWorkers, metadataKeyPrefix) + base.InfofCtx(ctx, base.KeyDCP, "purging persisted dcp metadata for attachment compaction run %s", a.CompactID) + metadata.Purge(base.DefaultNumWorkers) + return nil +} + func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[string]interface{}, persistClusterStatusCallback updateStatusCallbackFunc, terminator *base.SafeTerminator) error { database := options["database"].(*Database) @@ -110,6 +129,7 @@ func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[strin // but we'll consider that a follow-up enhancement to point this compaction operation at arbitrary collections. dataStore := database.Bucket.DefaultDataStore() collectionID := base.DefaultCollectionID + var metadataKeyPrefix string persistClusterStatus := func() { err := persistClusterStatusCallback() @@ -120,15 +140,29 @@ func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[strin defer persistClusterStatus() + var rollbackErr gocbcore.DCPRollbackError + // Need to check the current phase in the event we are resuming - No need to run mark again if we got as far as // cleanup last time... var err error switch a.Phase { case "mark", "": a.SetPhase("mark") - persistClusterStatus() - _, a.VBUUIDs, err = attachmentCompactMarkPhase(ctx, dataStore, collectionID, database, a.CompactID, terminator, &a.MarkedAttachments) + worker := func() (shouldRetry bool, err error, value interface{}) { + persistClusterStatus() + _, a.VBUUIDs, metadataKeyPrefix, err = attachmentCompactMarkPhase(ctx, dataStore, collectionID, database, a.CompactID, terminator, &a.MarkedAttachments) + if err != nil { + shouldRetry, err = a.handleAttachmentCompactionRollbackError(ctx, options, dataStore, database, err, MarkPhase, metadataKeyPrefix) + } + return shouldRetry, err, nil + } + // retry loop for handling a rollback during mark phase of compaction process + err, _ = base.RetryLoop("attachmentCompactMarkPhase", worker, base.CreateMaxDoublingSleeperFunc(25, 100, 10000)) if err != nil || terminator.IsClosed() { + if errors.As(err, &rollbackErr) || errors.Is(err, base.ErrVbUUIDMismatch) { + // log warning to show we hit max number of retries + base.WarnfCtx(ctx, "maximum retry attempts reached on mark phase: %v", err) + } return err } fallthrough @@ -142,9 +176,21 @@ func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[strin fallthrough case "cleanup": a.SetPhase("cleanup") - persistClusterStatus() - err := attachmentCompactCleanupPhase(ctx, dataStore, collectionID, database, a.CompactID, a.VBUUIDs, terminator) + worker := func() (shouldRetry bool, err error, value interface{}) { + persistClusterStatus() + metadataKeyPrefix, err = attachmentCompactCleanupPhase(ctx, dataStore, collectionID, database, a.CompactID, a.VBUUIDs, terminator) + if err != nil { + shouldRetry, err = a.handleAttachmentCompactionRollbackError(ctx, options, dataStore, database, err, CleanupPhase, metadataKeyPrefix) + } + return shouldRetry, err, nil + } + // retry loop for handling a rollback during mark phase of compaction process + err, _ = base.RetryLoop("attachmentCompactCleanupPhase", worker, base.CreateMaxDoublingSleeperFunc(25, 100, 10000)) if err != nil || terminator.IsClosed() { + if errors.As(err, &rollbackErr) || errors.Is(err, base.ErrVbUUIDMismatch) { + // log warning to show we hit max number of retries + base.WarnfCtx(ctx, "maximum retry attempts reached on cleanup phase: %v", err) + } return err } } @@ -153,6 +199,36 @@ func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[strin return nil } +func (a *AttachmentCompactionManager) handleAttachmentCompactionRollbackError(ctx context.Context, options map[string]interface{}, dataStore base.DataStore, database *Database, err error, phase, keyPrefix string) (bool, error) { + var rollbackErr gocbcore.DCPRollbackError + if errors.As(err, &rollbackErr) || errors.Is(err, base.ErrVbUUIDMismatch) { + base.InfofCtx(ctx, base.KeyDCP, "rollback indicated on %s phase of attachment compaction, resetting the task", phase) + // to rollback any phase for attachment compaction we need to purge all persisted dcp metadata + err = a.PurgeDCPMetadata(ctx, dataStore, database, keyPrefix) + if err != nil { + base.WarnfCtx(ctx, "error occurred during purging of dcp metadata: %w", err) + return false, err + } + if phase == MarkPhase { + // initialise new compaction run as we want to start the phase mark again in event of rollback + err = a.Init(ctx, options, nil) + if err != nil { + base.WarnfCtx(ctx, "error on initialization of new run after rollback has been indicated, %w", err) + return false, err + } + } else { + // we only handle rollback for mark and cleanup so if we call here it will be for cleanup phase + // we need to clear the vbUUID's on the manager for cleanup phase otherwise we will end up in loop of constant rollback + // as these are used for the initial metadata on the client + a.VBUUIDs = nil + } + // we should try again if it is rollback error + return true, nil + } + // if error isn't rollback then assume it's not recoverable + return false, err +} + func (a *AttachmentCompactionManager) SetPhase(phase string) { a.lock.Lock() defer a.lock.Unlock() diff --git a/rest/attachmentcompactiontest/attachment_compaction_api_test.go b/rest/attachmentcompactiontest/attachment_compaction_api_test.go index 8cc82ba89e..39efe0c24e 100644 --- a/rest/attachmentcompactiontest/attachment_compaction_api_test.go +++ b/rest/attachmentcompactiontest/attachment_compaction_api_test.go @@ -15,6 +15,7 @@ import ( "testing" "time" + "github.com/couchbase/gocbcore/v10" "github.com/couchbase/sync_gateway/base" "github.com/couchbase/sync_gateway/db" "github.com/couchbase/sync_gateway/rest" @@ -415,3 +416,65 @@ func TestAttachmentCompactionAbort(t *testing.T) { status := rt.WaitForAttachmentCompactionStatus(t, db.BackgroundProcessStateStopped) assert.Equal(t, int64(0), status.PurgedAttachments) } + +func TestAttachmentCompactionMarkPhaseRollback(t *testing.T) { + if base.UnitTestUrlIsWalrus() { + t.Skip("This test only works against Couchbase Server") + } + var garbageVBUUID gocbcore.VbUUID = 1234 + base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll) + + rt := rest.NewRestTesterDefaultCollection(t, nil) + defer rt.Close() + dataStore := rt.GetSingleDataStore() + + // Create some 'unmarked' attachments + makeUnmarkedDoc := func(docid string) { + err := dataStore.SetRaw(docid, 0, nil, []byte("{}")) + require.NoError(t, err) + } + + for i := 0; i < 1000; i++ { + docID := fmt.Sprintf("%s%s%d", base.AttPrefix, "unmarked", i) + makeUnmarkedDoc(docID) + } + + // kick off compaction and wait for "mark" phase to begin + resp := rt.SendAdminRequest("POST", "/{{.db}}/_compact?type=attachment", "") + rest.RequireStatus(t, resp, http.StatusOK) + _ = rt.WaitForAttachmentCompactionStatus(t, db.BackgroundProcessStateRunning) + + // immediately stop the compaction process (we just need the status data to be persisted to the bucket) + resp = rt.SendAdminRequest("POST", "/{{.db}}/_compact?type=attachment&action=stop", "") + rest.RequireStatus(t, resp, http.StatusOK) + stat := rt.WaitForAttachmentCompactionStatus(t, db.BackgroundProcessStateStopped) + require.Equal(t, db.MarkPhase, stat.Phase) + + // alter persisted dcp metadata from the first run to force a rollback + name := db.GenerateCompactionDCPStreamName(stat.CompactID, "mark") + checkpointPrefix := fmt.Sprintf("%s:%v", "_sync:dcp_ck:", name) + + meta := base.NewDCPMetadataCS(dataStore, 1024, 8, checkpointPrefix) + vbMeta := meta.GetMeta(0) + vbMeta.VbUUID = garbageVBUUID + meta.SetMeta(0, vbMeta) + meta.Persist(0, []uint16{0}) + + // kick off a new run attempting to start it again (should force into rollback handling) + resp = rt.SendAdminRequest("POST", "/{{.db}}/_compact?type=attachment&action=start", "") + rest.RequireStatus(t, resp, http.StatusOK) + _ = rt.WaitForAttachmentCompactionStatus(t, db.BackgroundProcessStateCompleted) + + // Validate results of recovered attachment compaction process + resp = rt.SendAdminRequest("GET", "/{{.db}}/_compact?type=attachment", "") + rest.RequireStatus(t, resp, http.StatusOK) + + // validate that the compaction process actually recovered from rollback by checking stats + var response db.AttachmentManagerResponse + err := base.JSONUnmarshal(resp.BodyBytes(), &response) + require.NoError(t, err) + require.Equal(t, db.BackgroundProcessStateCompleted, response.State) + require.Equal(t, int64(0), response.MarkedAttachments) + require.Equal(t, int64(1000), response.PurgedAttachments) + +} From a62cd6319de1362cb572552cc76ffae8efcc52d4 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Mon, 5 Jun 2023 06:05:23 -0400 Subject: [PATCH 38/42] Make test pass if there are buckets that are non test buckets (#6285) --- base/bootstrap_test.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/base/bootstrap_test.go b/base/bootstrap_test.go index c5512e6609..368b224501 100644 --- a/base/bootstrap_test.go +++ b/base/bootstrap_test.go @@ -9,6 +9,7 @@ package base import ( + "strings" "sync" "testing" @@ -59,7 +60,14 @@ func TestBootstrapRefCounting(t *testing.T) { buckets, err := cluster.GetConfigBuckets() require.NoError(t, err) - require.Len(t, buckets, tbpNumBuckets()) + var testBuckets []string + for _, bucket := range buckets { + if strings.HasPrefix(bucket, tbpBucketNamePrefix) { + testBuckets = append(testBuckets, bucket) + } + + } + require.Len(t, testBuckets, tbpNumBuckets()) // GetConfigBuckets doesn't cache connections, it uses cluster connection to determine number of buckets require.Len(t, cluster.cachedBucketConnections.buckets, 0) @@ -73,11 +81,11 @@ func TestBootstrapRefCounting(t *testing.T) { } primeBucketConnectionCache(buckets) - require.Len(t, cluster.cachedBucketConnections.buckets, tbpNumBuckets()) + require.Len(t, cluster.cachedBucketConnections.buckets, len(buckets)) // call removeOutdatedBuckets as no-op cluster.cachedBucketConnections.removeOutdatedBuckets(SetOf(buckets...)) - require.Len(t, cluster.cachedBucketConnections.buckets, tbpNumBuckets()) + require.Len(t, cluster.cachedBucketConnections.buckets, len(buckets)) // call removeOutdatedBuckets to remove all cached buckets, call multiple times to make sure idempotent for i := 0; i < 3; i++ { @@ -86,7 +94,7 @@ func TestBootstrapRefCounting(t *testing.T) { } primeBucketConnectionCache(buckets) - require.Len(t, cluster.cachedBucketConnections.buckets, tbpNumBuckets()) + require.Len(t, cluster.cachedBucketConnections.buckets, len(buckets)) // make sure that you can still use an active connection while the bucket has been removed wg := sync.WaitGroup{} From 3b3201bd44047d6a56d1abc39c2a49eed288559b Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Mon, 5 Jun 2023 11:39:53 -0700 Subject: [PATCH 39/42] Tiny whitespace lint fix --- db/blip_sync_context.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/blip_sync_context.go b/db/blip_sync_context.go index da9be4c989..d09173b5e1 100644 --- a/db/blip_sync_context.go +++ b/db/blip_sync_context.go @@ -137,8 +137,8 @@ type BlipSyncContext struct { readOnly bool collections *blipCollections // all collections handled by blipSyncContext, implicit or via GetCollections - revSender *blipRevSender // schedules sending 'rev' messages - stats blipSyncStats // internal structure to store stats + revSender *blipRevSender // schedules sending 'rev' messages + stats blipSyncStats // internal structure to store stats } // blipSyncStats has support structures to support reporting stats at regular interval From d0be0f8aa08c463f4d75cd13a45eb9b5ac0e6e88 Mon Sep 17 00:00:00 2001 From: Gregory Newman-Smith <109068393+gregns1@users.noreply.github.com> Date: Tue, 6 Jun 2023 12:58:39 +0100 Subject: [PATCH 40/42] CBG-3022: Replicator will not reconnect when max_back_off != 0 (#6287) --- db/active_replicator.go | 2 +- db/active_replicator_common.go | 4 +- db/active_replicator_config.go | 2 +- db/active_replicator_pull.go | 2 +- db/active_replicator_push.go | 2 +- db/sg_replicate_cfg.go | 6 + db/util_testing.go | 9 ++ rest/replicatortest/replicator_test.go | 145 +++++++++++++++++++++++-- rest/utilities_testing_resttester.go | 8 ++ 9 files changed, 167 insertions(+), 13 deletions(-) diff --git a/db/active_replicator.go b/db/active_replicator.go index 39e7e86cb2..17147f0207 100644 --- a/db/active_replicator.go +++ b/db/active_replicator.go @@ -214,7 +214,7 @@ func connect(arc *activeReplicatorCommon, idSuffix string) (blipSender *blip.Sen blipContext.WebsocketPingInterval = arc.config.WebsocketPingInterval blipContext.OnExitCallback = func() { // fall into a reconnect loop only if the connection is unexpectedly closed. - if arc.ctx.Err() == nil && arc.config.TotalReconnectTimeout != 0 { + if arc.ctx.Err() == nil { go arc.reconnectLoop() } } diff --git a/db/active_replicator_common.go b/db/active_replicator_common.go index 884e7a39ce..2ff157c07f 100644 --- a/db/active_replicator_common.go +++ b/db/active_replicator_common.go @@ -153,7 +153,9 @@ func (a *activeReplicatorCommon) reconnectLoop() { // if a reconnect timeout is set, we'll wrap the existing so both can stop the retry loop var deadlineCancel context.CancelFunc - ctx, deadlineCancel = context.WithDeadline(ctx, time.Now().Add(a.config.TotalReconnectTimeout)) + if a.config.TotalReconnectTimeout != 0 { + ctx, deadlineCancel = context.WithDeadline(ctx, time.Now().Add(a.config.TotalReconnectTimeout)) + } sleeperFunc := base.SleeperFuncCtx( base.CreateIndefiniteMaxDoublingSleeperFunc( diff --git a/db/active_replicator_config.go b/db/active_replicator_config.go index 31789dde1f..2cf61b50d6 100644 --- a/db/active_replicator_config.go +++ b/db/active_replicator_config.go @@ -80,7 +80,7 @@ type ActiveReplicatorConfig struct { InitialReconnectInterval time.Duration // MaxReconnectInterval is the maximum amount of time to wait between exponential backoff reconnect attempts. MaxReconnectInterval time.Duration - // TotalReconnectTimeout, if non-zero, is the amount of time to wait before giving up trying to reconnect. Zero disables reconnect entirely. + // TotalReconnectTimeout, if non-zero, is the amount of time to wait before giving up trying to reconnect. Zero value will retry indefinitely. TotalReconnectTimeout time.Duration // CollectionsEnabled can be set to replicate one or more named collections, rather than just the default collection. diff --git a/db/active_replicator_pull.go b/db/active_replicator_pull.go index 9d355ab773..8e4ab5ec3e 100644 --- a/db/active_replicator_pull.go +++ b/db/active_replicator_pull.go @@ -59,7 +59,7 @@ func (apr *ActivePullReplicator) Start(ctx context.Context) error { base.WarnfCtx(apr.ctx, "Couldn't connect: %v", err) if errors.Is(err, fatalReplicatorConnectError) { base.WarnfCtx(apr.ctx, "Stopping replication connection attempt") - } else if apr.config.TotalReconnectTimeout != 0 { + } else { base.InfofCtx(apr.ctx, base.KeyReplicate, "Attempting to reconnect in background: %v", err) apr.reconnectActive.Set(true) go apr.reconnectLoop() diff --git a/db/active_replicator_push.go b/db/active_replicator_push.go index 58e50bc774..98b6e5eb04 100644 --- a/db/active_replicator_push.go +++ b/db/active_replicator_push.go @@ -63,7 +63,7 @@ func (apr *ActivePushReplicator) Start(ctx context.Context) error { base.WarnfCtx(apr.ctx, "Couldn't connect: %s", err) if errors.Is(err, fatalReplicatorConnectError) { base.WarnfCtx(apr.ctx, "Stopping replication connection attempt") - } else if apr.config.TotalReconnectTimeout != 0 { + } else { base.InfofCtx(apr.ctx, base.KeyReplicate, "Attempting to reconnect in background: %v", err) apr.reconnectActive.Set(true) go apr.reconnectLoop() diff --git a/db/sg_replicate_cfg.go b/db/sg_replicate_cfg.go index 37164152fb..952628ecd6 100644 --- a/db/sg_replicate_cfg.go +++ b/db/sg_replicate_cfg.go @@ -1204,6 +1204,12 @@ func (m *sgReplicateManager) GetNumberActiveReplicators() int { return len(m.activeReplicators) } +func (m *sgReplicateManager) GetActiveReplicator(name string) *ActiveReplicator { + m.activeReplicatorsLock.Lock() + defer m.activeReplicatorsLock.Unlock() + return m.activeReplicators[name] +} + // RebalanceReplications distributes the set of defined replications across the set of available nodes func (c *SGRCluster) RebalanceReplications() { diff --git a/db/util_testing.go b/db/util_testing.go index 63f6814d08..7ed3e4b79b 100644 --- a/db/util_testing.go +++ b/db/util_testing.go @@ -17,6 +17,7 @@ import ( "testing" "time" + "github.com/couchbase/go-blip" sgbucket "github.com/couchbase/sg-bucket" "github.com/couchbase/sync_gateway/auth" "github.com/couchbase/sync_gateway/base" @@ -619,3 +620,11 @@ func AllocateTestSequence(database *DatabaseContext) (uint64, error) { func ReleaseTestSequence(database *DatabaseContext, sequence uint64) error { return database.sequences.releaseSequence(sequence) } + +func (a *ActiveReplicator) GetActiveReplicatorConfig() *ActiveReplicatorConfig { + return a.config +} + +func (apr *ActivePullReplicator) GetBlipSender() *blip.Sender { + return apr.blipSender +} diff --git a/rest/replicatortest/replicator_test.go b/rest/replicatortest/replicator_test.go index c5410b17f5..4b54fe3032 100644 --- a/rest/replicatortest/replicator_test.go +++ b/rest/replicatortest/replicator_test.go @@ -572,7 +572,7 @@ func TestStopServerlessConnectionLimitingDuringReplications(t *testing.T) { // assert it enter error state replicationID = t.Name() + "2" rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) - rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateError) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateReconnecting) // change limit to 0 (turning limiting off) and assert that the replications currently running continue as normal and reject any new ones being added resp = rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 0}`) @@ -655,11 +655,7 @@ func TestServerlessConnectionLimitingContinuous(t *testing.T) { // assert it enter error state replicationID = t.Name() + "2" rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) - rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateError) - - // assert on stats - dbstats := rt2.GetDatabase().DbStats - assert.Equal(t, int64(2), dbstats.DatabaseStats.NumReplicationsRejectedLimit.Value()) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateReconnecting) // change limit to 1 and assert that the replications currently running continue as normal and reject any new ones being added resp = rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 1}`) @@ -670,7 +666,7 @@ func TestServerlessConnectionLimitingContinuous(t *testing.T) { // assert we still can't create a new replication replicationID = t.Name() + "3" rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) - rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateError) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateReconnecting) // stop one of the replicators currently running resp = rt1.SendAdminRequest(http.MethodPut, "/{{.db}}/_replicationStatus/"+t.Name()+"1?action=stop", "") @@ -682,7 +678,7 @@ func TestServerlessConnectionLimitingContinuous(t *testing.T) { // assert we still can't create new replication (new limit is 1) replicationID = t.Name() + "4" rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) - rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateError) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateReconnecting) } @@ -2335,6 +2331,139 @@ func TestActiveReplicatorPullSkippedSequence(t *testing.T) { assert.Equal(t, int64(0), dbstats.ProcessedSequenceLenPostCleanup.Value()) } +// TestReplicatorReconnectBehaviour tests the interactive values that configure replicator reconnection behaviour +func TestReplicatorReconnectBehaviour(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + + testCases := []struct { + name string + maxBackoff int + specified bool + reconnectTimeout time.Duration + maxReconnectInterval time.Duration + }{ + { + name: "maxbackoff 0", + specified: true, + maxBackoff: 0, + reconnectTimeout: 10 * time.Minute, + maxReconnectInterval: 5 * time.Minute, + }, + { + name: "max backoff not specified", + specified: false, + reconnectTimeout: 0 * time.Minute, + maxReconnectInterval: 5 * time.Minute, + }, + { + name: "maxbackoff 1", + specified: true, + maxBackoff: 1, + reconnectTimeout: 0 * time.Minute, + maxReconnectInterval: 1 * time.Minute, + }, + } + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + activeRT, _, remoteURL, teardown := rest.SetupSGRPeers(t) + defer teardown() + var resp *rest.TestResponse + + if test.specified { + resp = activeRT.SendAdminRequest(http.MethodPut, "/{{.db}}/_replication/replication1", fmt.Sprintf(`{ + "replication_id": "replication1", "remote": "%s", "direction": "pull", + "collections_enabled": %t, "continuous": true, "max_backoff_time": %d}`, remoteURL, base.TestsUseNamedCollections(), test.maxBackoff)) + rest.RequireStatus(t, resp, http.StatusCreated) + } else { + resp = activeRT.SendAdminRequest(http.MethodPut, "/{{.db}}/_replication/replication1", fmt.Sprintf(`{ + "replication_id": "replication1", "remote": "%s", "direction": "pull", + "collections_enabled": %t, "continuous": true}`, remoteURL, base.TestsUseNamedCollections())) + rest.RequireStatus(t, resp, http.StatusCreated) + } + activeRT.WaitForReplicationStatus("replication1", db.ReplicationStateRunning) + activeRT.WaitForActiveReplicatorInitialization(1) + + activeReplicator := activeRT.GetDatabase().SGReplicateMgr.GetActiveReplicator("replication1") + config := activeReplicator.GetActiveReplicatorConfig() + + assert.Equal(t, test.reconnectTimeout, config.TotalReconnectTimeout) + assert.Equal(t, test.maxReconnectInterval, config.MaxReconnectInterval) + }) + } + +} + +// TestReconnectReplicator: +// - Starts 2 RestTesters, one active, and one remote. +// - creates a pull replication from remote to active rest tester +// - kills the blip sender to simulate a disconnect that was not initiated by the user +// - asserts the replicator enters a reconnecting state and eventually enters a running state again +// - puts some docs on the remote rest tester and assert the replicator pulls these docs to prove reconnect was successful +func TestReconnectReplicator(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll) + + testCases := []struct { + name string + maxBackoff int + specified bool + }{ + { + name: "maxbackoff 0", + specified: true, + maxBackoff: 0, + }, + { + name: "max backoff not specified", + specified: false, + }, + { + name: "maxbackoff 1", + specified: true, + maxBackoff: 1, + }, + } + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + activeRT, remoteRT, remoteURL, teardown := rest.SetupSGRPeers(t) + defer teardown() + var resp *rest.TestResponse + const replicationName = "replication1" + + if test.specified { + resp = activeRT.SendAdminRequest(http.MethodPut, "/{{.db}}/_replication/replication1", fmt.Sprintf(`{ + "replication_id": "%s", "remote": "%s", "direction": "pull", + "collections_enabled": %t, "continuous": true, "max_backoff_time": %d}`, replicationName, remoteURL, base.TestsUseNamedCollections(), test.maxBackoff)) + rest.RequireStatus(t, resp, http.StatusCreated) + } else { + resp = activeRT.SendAdminRequest(http.MethodPut, "/{{.db}}/_replication/replication1", fmt.Sprintf(`{ + "replication_id": "%s", "remote": "%s", "direction": "pull", + "collections_enabled": %t, "continuous": true}`, replicationName, remoteURL, base.TestsUseNamedCollections())) + rest.RequireStatus(t, resp, http.StatusCreated) + } + activeRT.WaitForReplicationStatus("replication1", db.ReplicationStateRunning) + + activeRT.WaitForActiveReplicatorInitialization(1) + ar := activeRT.GetDatabase().SGReplicateMgr.GetActiveReplicator("replication1") + // race between stopping the blip sender here and the initialization of it on the replicator so need this assertion in here to avoid panic + activeRT.WaitForPullBlipSenderInitialisation(replicationName) + ar.Pull.GetBlipSender().Stop() + + activeRT.WaitForReplicationStatus(replicationName, db.ReplicationStateReconnecting) + + activeRT.WaitForReplicationStatus(replicationName, db.ReplicationStateRunning) + + for i := 0; i < 10; i++ { + response := remoteRT.SendAdminRequest(http.MethodPut, "/{{.keyspace}}/"+fmt.Sprint(i), `{"source": "remote"}`) + rest.RequireStatus(t, response, http.StatusCreated) + } + _, err := activeRT.WaitForChanges(10, "/{{.keyspace}}/_changes", "", true) + require.NoError(t, err) + }) + } + +} + // TestActiveReplicatorPullAttachments: // - Starts 2 RestTesters, one active, and one passive. // - Creates a document with an attachment on rt2 which can be pulled by the replicator running in rt1. diff --git a/rest/utilities_testing_resttester.go b/rest/utilities_testing_resttester.go index 6f1aee79a1..08b52c4fc3 100644 --- a/rest/utilities_testing_resttester.go +++ b/rest/utilities_testing_resttester.go @@ -134,6 +134,14 @@ func (rt *RestTester) WaitForActiveReplicatorInitialization(count int) { require.NoError(rt.TB, rt.WaitForCondition(successFunc), "mismatch on number of active replicators") } +func (rt *RestTester) WaitForPullBlipSenderInitialisation(name string) { + successFunc := func() bool { + bs := rt.GetDatabase().SGReplicateMgr.GetActiveReplicator(name).Pull.GetBlipSender() + return bs != nil + } + require.NoError(rt.TB, rt.WaitForCondition(successFunc), "blip sender on active replicator not initialized") +} + // createReplication creates a replication via the REST API with the specified ID, remoteURL, direction and channel filter func (rt *RestTester) CreateReplication(replicationID string, remoteURLString string, direction db.ActiveReplicatorDirection, channels []string, continuous bool, conflictResolver db.ConflictResolverType) { rt.CreateReplicationForDB("{{.db}}", replicationID, remoteURLString, direction, channels, continuous, conflictResolver) From 6cb633a486b102b7cfa6d7ca8914c5de53732f35 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Tue, 6 Jun 2023 10:49:05 -0400 Subject: [PATCH 41/42] Add the current state of the database in error message (#6290) --- rest/api.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rest/api.go b/rest/api.go index a340946003..54001e6392 100644 --- a/rest/api.go +++ b/rest/api.go @@ -330,7 +330,7 @@ func (h *handler) handlePostResync() error { } if dbState != db.DBOffline { - return base.HTTPErrorf(http.StatusServiceUnavailable, "Database must be _offline before calling _resync") + return base.HTTPErrorf(http.StatusServiceUnavailable, "Database must be _offline before calling _resync, current state: %s", db.RunStateString[dbState]) } } From e4f58d26577be0b7c8cdfdd79b613da80d85ceeb Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Tue, 6 Jun 2023 11:26:02 -0700 Subject: [PATCH 42/42] Updated go-blip with a fix for a race condition see commit 3f1855f3bfe9 in go-blip --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 2a5e0ce07a..d600163db2 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/coreos/go-oidc v2.2.1+incompatible github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 github.com/couchbase/clog v0.1.0 - github.com/couchbase/go-blip v0.0.0-20230515195238-a7b936f01f65 + github.com/couchbase/go-blip v0.0.0-20230606182423-3f1855f3bfe9 github.com/couchbase/go-couchbase v0.1.1 github.com/couchbase/gocb/v2 v2.6.2 github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911 diff --git a/go.sum b/go.sum index 2be4257b00..79b217a25c 100644 --- a/go.sum +++ b/go.sum @@ -69,8 +69,8 @@ github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 h1:tRxeXfSHBzAq6m github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46/go.mod h1:tJF3TUUO3ZDBU15auN1gNsIVY3Oo+jj46zIXH4RBxk4= github.com/couchbase/clog v0.1.0 h1:4Kh/YHkhRjMCbdQuvRVsm39XZh4FtL1d8fAwJsHrEPY= github.com/couchbase/clog v0.1.0/go.mod h1:7tzUpEOsE+fgU81yfcjy5N1H6XtbVC8SgOz/3mCjmd4= -github.com/couchbase/go-blip v0.0.0-20230515195238-a7b936f01f65 h1:9LSHcwSzpLj7/M3sKZiWALc0kdNApWGdlY2Q7WbuaF4= -github.com/couchbase/go-blip v0.0.0-20230515195238-a7b936f01f65/go.mod h1:WstEZkP0F1n3ev8e2mzoUqe4pKYHSJzA1uv73ke/GNQ= +github.com/couchbase/go-blip v0.0.0-20230606182423-3f1855f3bfe9 h1:aBMVnwXfrz9jBAjHMrU4B4JgDI22usPTEioaGA037jU= +github.com/couchbase/go-blip v0.0.0-20230606182423-3f1855f3bfe9/go.mod h1:WstEZkP0F1n3ev8e2mzoUqe4pKYHSJzA1uv73ke/GNQ= github.com/couchbase/go-couchbase v0.1.1 h1:ClFXELcKj/ojyoTYbsY34QUrrYCBi/1G749sXSCkdhk= github.com/couchbase/go-couchbase v0.1.1/go.mod h1:+/bddYDxXsf9qt0xpDUtRR47A2GjaXmGGAqQ/k3GJ8A= github.com/couchbase/gocb/v2 v2.6.2 h1:sZg0+3GiYW7OT53ENEGnkkQMXhVuJ1qOJplvZDlM5Xk=