Skip to content

Commit

Permalink
Disallow offloaded children during timeline deletion (#9582)
Browse files Browse the repository at this point in the history
If we delete a timeline that has childen, those children will have their
data corrupted. Therefore, extend the already existing safety check to
offloaded timelines as well.

Part of #8088
  • Loading branch information
arpad-m authored Oct 30, 2024
1 parent 8d70f88 commit 65b6939
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 21 deletions.
39 changes: 19 additions & 20 deletions pageserver/src/tenant/timeline/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ impl DeleteTimelineFlow {
) -> Result<(), DeleteTimelineError> {
super::debug_assert_current_span_has_tenant_and_timeline_id();

let (timeline, mut guard) = Self::prepare(tenant, timeline_id)?;
let allow_offloaded_children = false;
let (timeline, mut guard) = Self::prepare(tenant, timeline_id, allow_offloaded_children)?;

guard.mark_in_progress()?;

Expand Down Expand Up @@ -340,6 +341,7 @@ impl DeleteTimelineFlow {
pub(super) fn prepare(
tenant: &Tenant,
timeline_id: TimelineId,
allow_offloaded_children: bool,
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
// Note the interaction between this guard and deletion guard.
// Here we attempt to lock deletion guard when we're holding a lock on timelines.
Expand All @@ -352,30 +354,27 @@ impl DeleteTimelineFlow {
// T1: acquire deletion lock, do another `DeleteTimelineFlow::run`
// For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346`
let timelines = tenant.timelines.lock().unwrap();
let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();

let timeline = match timelines.get(&timeline_id) {
Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
None => {
let offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
match offloaded_timelines.get(&timeline_id) {
Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
None => return Err(DeleteTimelineError::NotFound),
}
}
None => match timelines_offloaded.get(&timeline_id) {
Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
None => return Err(DeleteTimelineError::NotFound),
},
};

// Ensure that there are no child timelines **attached to that pageserver**,
// because detach removes files, which will break child branches
let children: Vec<TimelineId> = timelines
.iter()
.filter_map(|(id, entry)| {
if entry.get_ancestor_timeline_id() == Some(timeline_id) {
Some(*id)
} else {
None
}
})
.collect();
// Ensure that there are no child timelines, because we are about to remove files,
// which will break child branches
let mut children = Vec::new();
if !allow_offloaded_children {
children.extend(timelines_offloaded.iter().filter_map(|(id, entry)| {
(entry.ancestor_timeline_id == Some(timeline_id)).then_some(*id)
}));
}
children.extend(timelines.iter().filter_map(|(id, entry)| {
(entry.get_ancestor_timeline_id() == Some(timeline_id)).then_some(*id)
}));

if !children.is_empty() {
return Err(DeleteTimelineError::HasChildren(children));
Expand Down
4 changes: 3 additions & 1 deletion pageserver/src/tenant/timeline/offload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ pub(crate) async fn offload_timeline(
debug_assert_current_span_has_tenant_and_timeline_id();
tracing::info!("offloading archived timeline");

let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?;
let allow_offloaded_children = true;
let (timeline, guard) =
DeleteTimelineFlow::prepare(tenant, timeline.timeline_id, allow_offloaded_children)?;

let TimelineOrOffloaded::Timeline(timeline) = timeline else {
tracing::error!("timeline already offloaded, but given timeline object");
Expand Down
7 changes: 7 additions & 0 deletions test_runner/regress/test_timeline_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,13 @@ def leaf_offloaded():
wait_until(30, 1, leaf_offloaded)
wait_until(30, 1, parent_offloaded)

# Offloaded child timelines should still prevent deletion
with pytest.raises(
PageserverApiException,
match=f".* timeline which has child timelines: \\[{leaf_timeline_id}\\]",
):
ps_http.timeline_delete(tenant_id, parent_timeline_id)

ps_http.timeline_archival_config(
tenant_id,
grandparent_timeline_id,
Expand Down

1 comment on commit 65b6939

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5416 tests run: 5184 passed, 0 failed, 232 skipped (full report)


Code coverage* (full report)

  • functions: 31.5% (7771 of 24686 functions)
  • lines: 49.0% (61012 of 124616 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
65b6939 at 2024-10-30T20:01:32.304Z :recycle:

Please sign in to comment.