Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update ring with new ip address when instance is lost, rejoins, but heartbeat is disabled #6271

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
* [ENHANCEMENT] Ingester/Store Gateway Clients: Introduce an experimental HealthCheck handler to quickly fail requests directed to unhealthy targets. #6225 #6257
* [ENHANCEMENT] Upgrade build image and Go version to 1.23.2. #6261 #6262
* [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224
* [BUGFIX] Ring: update ring with new ip address when instance is lost, rejoins, but heartbeat is disabled #6271

## 1.18.0 2024-09-03

Expand Down
7 changes: 7 additions & 0 deletions pkg/ring/lifecycler.go
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,13 @@ func (i *Lifecycler) initRing(ctx context.Context) error {
return ringDesc, true, nil
}

if i.cfg.HeartbeatPeriod == 0 && i.Addr != instanceDesc.Addr {
// Update the address if it has changed
instanceDesc.Addr = i.Addr
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can have this line before line 692 and lets the existing deep equal check take care if the Ip changed?

Setting the instanceDesc.Addr = i.Addr should be a noop in case the IP did not change, if it changed, the check on https://github.com/cortexproject/cortex/pull/6271/files#diff-4d46760513b4f0a55e4cebb89e6a055859204f00ca50220cd42b5191b018b152R692 should take care of updating the ring.

ringDesc.Ingesters[i.ID] = instanceDesc
return ringDesc, true, nil
}

// we haven't modified the ring, don't try to store it.
return nil, true, nil
})
Expand Down
41 changes: 34 additions & 7 deletions pkg/ring/lifecycler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ func testLifecyclerConfig(ringConfig Config, id string) LifecyclerConfig {
return lifecyclerConfig
}

// testLifecyclerConfigWithAddr creates a LifecyclerConfig with the given address.
// This is useful for testing when we want to set the address to a specific value.
func testLifecyclerConfigWithAddr(ringConfig Config, id string, addr string) LifecyclerConfig {
l := testLifecyclerConfig(ringConfig, id)
l.Addr = addr
return l
}

func checkNormalised(d interface{}, id string) bool {
desc, ok := d.(*Desc)
return ok &&
Expand Down Expand Up @@ -644,8 +652,8 @@ func TestRestartIngester_DisabledHeartbeat_unregister_on_shutdown_false(t *testi
}

// Starts Ingester and wait it to became active
startIngesterAndWaitActive := func(ingId string) *Lifecycler {
lifecyclerConfig := testLifecyclerConfig(ringConfig, ingId)
startIngesterAndWaitActive := func(ingId string, addr string) *Lifecycler {
lifecyclerConfig := testLifecyclerConfigWithAddr(ringConfig, ingId, addr)
// Disabling heartBeat and unregister_on_shutdown
lifecyclerConfig.UnregisterOnShutdown = false
lifecyclerConfig.HeartbeatPeriod = 0
Expand All @@ -662,10 +670,10 @@ func TestRestartIngester_DisabledHeartbeat_unregister_on_shutdown_false(t *testi
// test if the ingester 2 became active after:
// * Clean Shutdown (LEAVING after shutdown)
// * Crashes while in the PENDING or JOINING state
l1 := startIngesterAndWaitActive("ing1")
l1 := startIngesterAndWaitActive("ing1", "0.0.0.0")
defer services.StopAndAwaitTerminated(context.Background(), l1) //nolint:errcheck

l2 := startIngesterAndWaitActive("ing2")
l2 := startIngesterAndWaitActive("ing2", "0.0.0.0")

ingesters := poll(func(desc *Desc) bool {
return len(desc.Ingesters) == 2 && desc.Ingesters["ing1"].State == ACTIVE && desc.Ingesters["ing2"].State == ACTIVE
Expand All @@ -684,7 +692,7 @@ func TestRestartIngester_DisabledHeartbeat_unregister_on_shutdown_false(t *testi
assert.Equal(t, LEAVING, ingesters["ing2"].State)

// Start Ingester2 again - Should flip back to ACTIVE in the ring
l2 = startIngesterAndWaitActive("ing2")
l2 = startIngesterAndWaitActive("ing2", "0.0.0.0")
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2))

// Simulate ingester2 crash on startup and left the ring with JOINING state
Expand All @@ -698,7 +706,7 @@ func TestRestartIngester_DisabledHeartbeat_unregister_on_shutdown_false(t *testi
})
require.NoError(t, err)

l2 = startIngesterAndWaitActive("ing2")
l2 = startIngesterAndWaitActive("ing2", "0.0.0.0")
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2))

// Simulate ingester2 crash on startup and left the ring with PENDING state
Expand All @@ -712,7 +720,26 @@ func TestRestartIngester_DisabledHeartbeat_unregister_on_shutdown_false(t *testi
})
require.NoError(t, err)

l2 = startIngesterAndWaitActive("ing2")
l2 = startIngesterAndWaitActive("ing2", "0.0.0.0")
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2))

// Simulate ingester2 crashing and left the ring with ACTIVE state, but when it comes up
// it has a different ip address
startIngesterAndWaitActive("ing2", "0.0.0.0")
ingesters = poll(func(desc *Desc) bool {
return desc.Ingesters["ing2"].State == ACTIVE && desc.Ingesters["ing2"].Addr == "0.0.0.0:1"
})
assert.Equal(t, ACTIVE, ingesters["ing2"].State)
assert.Equal(t, "0.0.0.0:1", ingesters["ing2"].Addr)

l2 = startIngesterAndWaitActive("ing2", "1.1.1.1")

// The ring should have the new ip address
ingesters = poll(func(desc *Desc) bool {
return desc.Ingesters["ing2"].State == ACTIVE && desc.Ingesters["ing2"].Addr == "1.1.1.1:1"
})
assert.Equal(t, ACTIVE, ingesters["ing2"].State)
assert.Equal(t, "1.1.1.1:1", ingesters["ing2"].Addr)
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2))
}

Expand Down
Loading