Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pageserver: rename control plane client & chunk validation requests #8997

Merged
merged 5 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pageserver/src/bin/pageserver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use clap::{Arg, ArgAction, Command};

use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
use pageserver::config::PageserverIdentity;
use pageserver::control_plane_client::ControlPlaneClient;
use pageserver::controller_upcall_client::ControllerUpcallClient;
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
Expand Down Expand Up @@ -396,7 +396,7 @@ fn start_pageserver(
// Set up deletion queue
let (deletion_queue, deletion_workers) = DeletionQueue::new(
remote_storage.clone(),
ControlPlaneClient::new(conf, &shutdown_pageserver),
ControllerUpcallClient::new(conf, &shutdown_pageserver),
conf,
);
if let Some(deletion_workers) = deletion_workers {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@ use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
use pageserver_api::config::NodeMetadata;

/// The Pageserver's client for using the control plane API: this is a small subset
/// of the overall control plane API, for dealing with generations (see docs/rfcs/025-generation-numbers.md)
pub struct ControlPlaneClient {
/// The Pageserver's client for using the storage controller upcall API: this is a small API
/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
///
/// The server presenting this API may either be the storage controller or some other
/// service (such as the Neon control plane) providing a store of generation numbers.
pub struct ControllerUpcallClient {
http_client: reqwest::Client,
base_url: Url,
node_id: NodeId,
Expand All @@ -45,7 +48,7 @@ pub trait ControlPlaneGenerationsApi {
) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
}

impl ControlPlaneClient {
impl ControllerUpcallClient {
/// A None return value indicates that the input `conf` object does not have control
/// plane API enabled.
pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Option<Self> {
Expand Down Expand Up @@ -114,7 +117,7 @@ impl ControlPlaneClient {
}
}

impl ControlPlaneGenerationsApi for ControlPlaneClient {
impl ControlPlaneGenerationsApi for ControllerUpcallClient {
/// Block until we get a successful response, or error out if we are shut down
async fn re_attach(
&self,
Expand Down Expand Up @@ -216,29 +219,38 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
.join("validate")
.expect("Failed to build validate path");

let request = ValidateRequest {
tenants: tenants
.into_iter()
.map(|(id, gen)| ValidateRequestTenant {
id,
gen: gen
.into()
.expect("Generation should always be valid for a Tenant doing deletions"),
})
.collect(),
};
// When sending validate requests, break them up into chunks so that we
// avoid possible edge cases of generating any HTTP requests that
// require database I/O across many thousands of tenants.
let mut result: HashMap<TenantShardId, bool> = HashMap::with_capacity(tenants.len());
for tenant_chunk in (tenants).chunks(128) {
let request = ValidateRequest {
tenants: tenant_chunk
.iter()
.map(|(id, generation)| ValidateRequestTenant {
id: *id,
gen: (*generation).into().expect(
"Generation should always be valid for a Tenant doing deletions",
),
})
.collect(),
};

failpoint_support::sleep_millis_async!("control-plane-client-validate-sleep", &self.cancel);
if self.cancel.is_cancelled() {
return Err(RetryForeverError::ShuttingDown);
}
failpoint_support::sleep_millis_async!(
"control-plane-client-validate-sleep",
&self.cancel
);
if self.cancel.is_cancelled() {
return Err(RetryForeverError::ShuttingDown);
}

let response: ValidateResponse = self.retry_http_forever(&re_attach_path, request).await?;
let response: ValidateResponse =
self.retry_http_forever(&re_attach_path, request).await?;
for rt in response.tenants {
result.insert(rt.id, rt.valid);
}
}

Ok(response
.tenants
.into_iter()
.map(|rt| (rt.id, rt.valid))
.collect())
Ok(result.into_iter().collect())
}
}
8 changes: 4 additions & 4 deletions pageserver/src/deletion_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;

use crate::control_plane_client::ControlPlaneGenerationsApi;
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
use crate::metrics;
use crate::tenant::remote_timeline_client::remote_layer_path;
use crate::tenant::remote_timeline_client::remote_timeline_path;
Expand Down Expand Up @@ -622,7 +622,7 @@ impl DeletionQueue {
/// If remote_storage is None, then the returned workers will also be None.
pub fn new<C>(
remote_storage: GenericRemoteStorage,
control_plane_client: Option<C>,
controller_upcall_client: Option<C>,
conf: &'static PageServerConf,
) -> (Self, Option<DeletionQueueWorkers<C>>)
where
Expand Down Expand Up @@ -662,7 +662,7 @@ impl DeletionQueue {
conf,
backend_rx,
executor_tx,
control_plane_client,
controller_upcall_client,
lsn_table.clone(),
cancel.clone(),
),
Expand Down Expand Up @@ -704,7 +704,7 @@ mod test {
use tokio::task::JoinHandle;

use crate::{
control_plane_client::RetryForeverError,
controller_upcall_client::RetryForeverError,
repository::Key,
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
};
Expand Down
14 changes: 7 additions & 7 deletions pageserver/src/deletion_queue/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ use tracing::info;
use tracing::warn;

use crate::config::PageServerConf;
use crate::control_plane_client::ControlPlaneGenerationsApi;
use crate::control_plane_client::RetryForeverError;
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
use crate::controller_upcall_client::RetryForeverError;
use crate::metrics;
use crate::virtual_file::MaybeFatalIo;

Expand Down Expand Up @@ -61,7 +61,7 @@ where
tx: tokio::sync::mpsc::Sender<DeleterMessage>,

// Client for calling into control plane API for validation of deletes
control_plane_client: Option<C>,
controller_upcall_client: Option<C>,

// DeletionLists which are waiting generation validation. Not safe to
// execute until [`validate`] has processed them.
Expand Down Expand Up @@ -94,15 +94,15 @@ where
conf: &'static PageServerConf,
rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
control_plane_client: Option<C>,
controller_upcall_client: Option<C>,
lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
cancel: CancellationToken,
) -> Self {
Self {
conf,
rx,
tx,
control_plane_client,
controller_upcall_client,
lsn_table,
pending_lists: Vec::new(),
validated_lists: Vec::new(),
Expand Down Expand Up @@ -145,8 +145,8 @@ where
return Ok(());
}

let tenants_valid = if let Some(control_plane_client) = &self.control_plane_client {
match control_plane_client
let tenants_valid = if let Some(controller_upcall_client) = &self.controller_upcall_client {
match controller_upcall_client
.validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
.await
{
Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pub mod basebackup;
pub mod config;
pub mod consumption_metrics;
pub mod context;
pub mod control_plane_client;
pub mod controller_upcall_client;
pub mod deletion_queue;
pub mod disk_usage_eviction_task;
pub mod http;
Expand Down
8 changes: 4 additions & 4 deletions pageserver/src/tenant/mgr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ use utils::{backoff, completion, crashsafe};

use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::control_plane_client::{
ControlPlaneClient, ControlPlaneGenerationsApi, RetryForeverError,
use crate::controller_upcall_client::{
ControlPlaneGenerationsApi, ControllerUpcallClient, RetryForeverError,
};
use crate::deletion_queue::DeletionQueueClient;
use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
Expand Down Expand Up @@ -122,7 +122,7 @@ pub(crate) enum ShardSelector {
Known(ShardIndex),
}

/// A convenience for use with the re_attach ControlPlaneClient function: rather
/// A convenience for use with the re_attach ControllerUpcallClient function: rather
/// than the serializable struct, we build this enum that encapsulates
/// the invariant that attached tenants always have generations.
///
Expand Down Expand Up @@ -341,7 +341,7 @@ async fn init_load_generations(
"Emergency mode! Tenants will be attached unsafely using their last known generation"
);
emergency_generations(tenant_confs)
} else if let Some(client) = ControlPlaneClient::new(conf, cancel) {
} else if let Some(client) = ControllerUpcallClient::new(conf, cancel) {
info!("Calling control plane API to re-attach tenants");
// If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
match client.re_attach(conf).await {
Expand Down
Loading