Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disallow archived timelines to be detached or reparented #9578

Merged
merged 3 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion pageserver/src/tenant/timeline/detach_ancestor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ pub(crate) enum Error {
#[error("shutting down, please retry later")]
ShuttingDown,

#[error("archived: {}", .0)]
Archived(TimelineId),

#[error(transparent)]
NotFound(crate::tenant::GetTimelineError),

Expand Down Expand Up @@ -79,8 +82,9 @@ impl From<Error> for ApiError {
fn from(value: Error) -> Self {
match value {
Error::NoAncestor => ApiError::Conflict(value.to_string()),
Error::TooManyAncestors => ApiError::BadRequest(anyhow::anyhow!("{}", value)),
Error::TooManyAncestors => ApiError::BadRequest(anyhow::anyhow!("{value}")),
Error::ShuttingDown => ApiError::ShuttingDown,
Error::Archived(_) => ApiError::BadRequest(anyhow::anyhow!("{value}")),
Error::OtherTimelineDetachOngoing(_) | Error::FailedToReparentAll => {
ApiError::ResourceUnavailable(value.to_string().into())
}
Expand Down Expand Up @@ -201,12 +205,18 @@ pub(super) async fn prepare(
}));
};

if detached.is_archived() != Some(false) {
return Err(Archived(detached.timeline_id));
}

if !ancestor_lsn.is_valid() {
// rare case, probably wouldn't even load
tracing::error!("ancestor is set, but ancestor_lsn is invalid, this timeline needs fixing");
return Err(NoAncestor);
}

check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;

if ancestor.ancestor_timeline.is_some() {
// non-technical requirement; we could flatten N ancestors just as easily but we chose
// not to, at least initially
Expand Down Expand Up @@ -950,3 +960,36 @@ where
}
})
}

fn check_no_archived_children_of_ancestor(
tenant: &Tenant,
detached: &Arc<Timeline>,
ancestor: &Arc<Timeline>,
ancestor_lsn: Lsn,
) -> Result<(), Error> {
let timelines = tenant.timelines.lock().unwrap();
let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
for timeline in reparentable_timelines(timelines.values(), detached, ancestor, ancestor_lsn) {
if timeline.is_archived() == Some(true) {
return Err(Error::Archived(timeline.timeline_id));
}
}
for timeline_offloaded in timelines_offloaded.values() {
if timeline_offloaded.ancestor_timeline_id != Some(ancestor.timeline_id) {
continue;
}
// This forbids the detach ancestor feature if flattened timelines are present,
// even if the ancestor_lsn is from after the branchpoint of the detached timeline.
// But as per current design, we don't record the ancestor_lsn of flattened timelines.
// This is a bit unfortunate, but as of writing this we don't support flattening
// anyway. Maybe we can evolve the data model in the future.
if let Some(retain_lsn) = timeline_offloaded.ancestor_retain_lsn {
let is_earlier = retain_lsn <= ancestor_lsn;
if !is_earlier {
continue;
}
}
return Err(Error::Archived(timeline_offloaded.timeline_id));
}
Ok(())
}
37 changes: 34 additions & 3 deletions test_runner/regress/test_timeline_detach_ancestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from threading import Barrier

import pytest
from fixtures.common_types import Lsn, TimelineId
from fixtures.common_types import Lsn, TimelineArchivalState, TimelineId
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
LogCursor,
Expand Down Expand Up @@ -634,7 +634,13 @@ def test_timeline_ancestor_detach_errors(neon_env_builder: NeonEnvBuilder, shard
shards = 2 if sharded else 1

neon_env_builder.num_pageservers = shards
env = neon_env_builder.init_start(initial_tenant_shard_count=shards if sharded else None)
env = neon_env_builder.init_start(
initial_tenant_shard_count=shards if sharded else None,
initial_tenant_conf={
# turn off gc, we want to do manual offloading here.
"gc_period": "0s",
},
)

pageservers = dict((int(p.id), p) for p in env.pageservers)

Expand All @@ -656,7 +662,9 @@ def test_timeline_ancestor_detach_errors(neon_env_builder: NeonEnvBuilder, shard
client.detach_ancestor(env.initial_tenant, env.initial_timeline)
assert info.value.status_code == 409

_ = env.create_branch("first_branch")
early_branch = env.create_branch("early_branch")

first_branch = env.create_branch("first_branch")

second_branch = env.create_branch("second_branch", ancestor_branch_name="first_branch")

Expand All @@ -665,6 +673,29 @@ def test_timeline_ancestor_detach_errors(neon_env_builder: NeonEnvBuilder, shard
client.detach_ancestor(env.initial_tenant, second_branch)
assert info.value.status_code == 400

client.timeline_archival_config(
env.initial_tenant, second_branch, TimelineArchivalState.ARCHIVED
)

client.timeline_archival_config(
env.initial_tenant, early_branch, TimelineArchivalState.ARCHIVED
)

with pytest.raises(PageserverApiException, match=f".*archived: {early_branch}") as info:
client.detach_ancestor(env.initial_tenant, first_branch)
assert info.value.status_code == 400

if not sharded:
client.timeline_offload(env.initial_tenant, early_branch)

client.timeline_archival_config(
env.initial_tenant, first_branch, TimelineArchivalState.ARCHIVED
)

with pytest.raises(PageserverApiException, match=f".*archived: {first_branch}") as info:
client.detach_ancestor(env.initial_tenant, first_branch)
assert info.value.status_code == 400


def test_sharded_timeline_detach_ancestor(neon_env_builder: NeonEnvBuilder):
"""
Expand Down
Loading