Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pageserver: enable virtual_file_io_mode=direct in CI by default #9716

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/_build-and-test-locally.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ jobs:

# run pageserver tests with different settings
for io_engine in std-fs tokio-epoll-uring ; do
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE=direct ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
done

# Run separate tests for real S3
Expand Down Expand Up @@ -300,6 +300,7 @@ jobs:
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct

# Temporary disable this step until we figure out why it's so flaky
# Ref https://github.com/neondatabase/neon/issues/4540
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ jobs:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
SYNC_BETWEEN_TESTS: true
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones
Expand Down
2 changes: 1 addition & 1 deletion libs/pageserver_api/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ pub struct ConfigToml {
pub timeline_offloading: bool,
pub ephemeral_bytes_per_memory_kb: usize,
pub l0_flush: Option<crate::models::L0FlushConfig>,
pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
pub virtual_file_io_mode: Option<crate::models::virtual_file::IoModeKind>,
#[serde(skip_serializing_if = "Option::is_none")]
pub no_sync: Option<bool>,
}
Expand Down
21 changes: 7 additions & 14 deletions libs/pageserver_api/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -976,31 +976,24 @@ pub mod virtual_file {
Debug,
)]
#[strum(serialize_all = "kebab-case")]
#[repr(u8)]
pub enum IoMode {
pub enum IoModeKind {
/// Uses buffered IO.
Buffered,
/// Uses direct IO, error out if the operation fails.
#[cfg(target_os = "linux")]
Direct,
}

impl IoMode {
impl IoModeKind {
// TODO(yuchen): change to [`Self::Direct`] once finish rollout.
#[cfg(target_os = "linux")]
pub const fn preferred() -> Self {
Self::Buffered
}
}

impl TryFrom<u8> for IoMode {
type Error = u8;

fn try_from(value: u8) -> Result<Self, Self::Error> {
Ok(match value {
v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
#[cfg(target_os = "linux")]
v if v == (IoMode::Direct as u8) => IoMode::Direct,
x => return Err(x),
})
#[cfg(target_os = "macos")]
pub const fn preferred() -> Self {
Self::Buffered
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion pageserver/client/src/mgmt_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ impl Client {
/// Configs io mode at runtime.
pub async fn put_io_mode(
&self,
mode: &pageserver_api::models::virtual_file::IoMode,
mode: &pageserver_api::models::virtual_file::IoModeKind,
) -> Result<()> {
let uri = format!("{}/v1/io_mode", self.mgmt_api_endpoint);
self.request(Method::PUT, uri, mode)
Expand Down
4 changes: 2 additions & 2 deletions pageserver/ctl/src/layer_map_analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use camino::{Utf8Path, Utf8PathBuf};
use pageserver::context::{DownloadBehavior, RequestContext};
use pageserver::task_mgr::TaskKind;
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use pageserver::virtual_file::api::IoMode;
use pageserver::virtual_file::api::IoModeKind;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::ops::Range;
Expand Down Expand Up @@ -137,7 +137,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
pageserver::virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::preferred(),
);
pageserver::page_cache::init(100);

Expand Down
8 changes: 4 additions & 4 deletions pageserver/ctl/src/layers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use pageserver::task_mgr::TaskKind;
use pageserver::tenant::storage_layer::{delta_layer, image_layer};
use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer};
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use pageserver::virtual_file::api::IoMode;
use pageserver::virtual_file::api::IoModeKind;
use pageserver::{page_cache, virtual_file};
use std::fs::{self, File};
use utils::id::{TenantId, TimelineId};
Expand Down Expand Up @@ -50,7 +50,7 @@ async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::preferred(),
);
page_cache::init(100);
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
Expand All @@ -64,7 +64,7 @@ async fn read_image_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::preferred(),
);
page_cache::init(100);
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
Expand Down Expand Up @@ -170,7 +170,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
pageserver::virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::preferred(),
);
pageserver::page_cache::init(100);

Expand Down
4 changes: 2 additions & 2 deletions pageserver/ctl/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use pageserver::{
page_cache,
task_mgr::TaskKind,
tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
virtual_file::{self, api::IoMode},
virtual_file::{self, api::IoModeKind},
};
use pageserver_api::shard::TenantShardId;
use postgres_ffi::ControlFileData;
Expand Down Expand Up @@ -208,7 +208,7 @@ async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::preferred(),
);
page_cache::init(100);
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
Expand Down
2 changes: 1 addition & 1 deletion pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pub(crate) struct Args {

/// Before starting the benchmark, live-reconfigure the pageserver to use specified io mode (buffered vs. direct).
#[clap(long)]
set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,
set_io_mode: Option<pageserver_api::models::virtual_file::IoModeKind>,

targets: Option<Vec<TenantTimelineId>>,
}
Expand Down
5 changes: 3 additions & 2 deletions pageserver/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ pub struct PageServerConf {
pub l0_flush: crate::l0_flush::L0FlushConfig,

/// Direct IO settings
pub virtual_file_io_mode: virtual_file::IoMode,
pub virtual_file_io_mode: virtual_file::IoModeKind,

/// Optionally disable disk syncs (unsafe!)
pub no_sync: bool,
Expand Down Expand Up @@ -415,7 +415,8 @@ impl PageServerConf {
l0_flush: l0_flush
.map(crate::l0_flush::L0FlushConfig::from)
.unwrap_or_default(),
virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
virtual_file_io_mode: virtual_file_io_mode
.unwrap_or(virtual_file::IoModeKind::preferred()),
no_sync: no_sync.unwrap_or(false),
};

Expand Down
4 changes: 2 additions & 2 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use hyper::header;
use hyper::StatusCode;
use hyper::{Body, Request, Response, Uri};
use metrics::launch_timestamp::LaunchTimestamp;
use pageserver_api::models::virtual_file::IoMode;
use pageserver_api::models::virtual_file::IoModeKind;
use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
use pageserver_api::models::IngestAuxFilesRequest;
use pageserver_api::models::ListAuxFilesRequest;
Expand Down Expand Up @@ -2617,7 +2617,7 @@ async fn put_io_mode_handler(
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
check_permission(&r, None)?;
let mode: IoMode = json_request(&mut r).await?;
let mode: IoModeKind = json_request(&mut r).await?;
crate::virtual_file::set_io_mode(mode);
json_response(StatusCode::OK, ())
}
Expand Down
71 changes: 65 additions & 6 deletions pageserver/src/virtual_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult;
mod metadata;
mod open_options;
use self::owned_buffers_io::write::OwnedAsyncWriter;
pub(crate) use api::IoMode;
pub(crate) use api::IoModeKind;
pub(crate) use io_engine::IoEngineKind;
pub(crate) use metadata::Metadata;
pub(crate) use open_options::*;
Expand Down Expand Up @@ -138,6 +138,7 @@ impl VirtualFile {
ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
) -> Result<Self, std::io::Error> {
let file = match get_io_mode() {
IoMode::NotSet => panic!("not initialized"),
IoMode::Buffered => {
let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
VirtualFile {
Expand Down Expand Up @@ -1332,7 +1333,7 @@ impl OpenFiles {
/// server startup.
///
#[cfg(not(test))]
pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode) {
pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoModeKind) {
if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
panic!("virtual_file::init called twice");
}
Expand Down Expand Up @@ -1370,14 +1371,72 @@ pub(crate) type IoBuffer = AlignedBuffer<ConstAlign<{ get_io_buffer_alignment()
pub(crate) type IoPageSlice<'a> =
AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;

static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);
#[derive(Clone, Copy, Debug)]
#[repr(u8)]
pub(crate) enum IoMode {
NotSet,
Buffered,
#[cfg(target_os = "linux")]
Direct,
}

impl TryFrom<u8> for IoMode {
type Error = u8;

fn try_from(value: u8) -> Result<Self, Self::Error> {
Ok(match value {
v if v == (IoMode::NotSet as u8) => IoMode::NotSet,
v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
#[cfg(target_os = "linux")]
v if v == (IoMode::Direct as u8) => IoMode::Direct,
x => return Err(x),
})
}
}

impl From<IoModeKind> for IoMode {
fn from(value: IoModeKind) -> Self {
match value {
IoModeKind::Buffered => IoMode::Buffered,
#[cfg(target_os = "linux")]
IoModeKind::Direct => IoMode::Direct,
}
}
}

static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::NotSet as u8);

pub(crate) fn set_io_mode(mode: IoMode) {
pub(crate) fn set_io_mode(mode_kind: IoModeKind) {
let mode: IoMode = mode_kind.into();
IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
}

pub(crate) fn get_io_mode() -> IoMode {
IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap()
fn get_io_mode() -> IoMode {
let mode = IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap();
if cfg!(test) {
match mode {
IoMode::NotSet => {
let env_var_name = "NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE";
let kind = match std::env::var(env_var_name) {
Ok(v) => match v.parse::<IoModeKind>() {
Ok(mode_kind) => mode_kind,
Err(e) => {
panic!("invalid VirtualFile io mode for env var {env_var_name}: {e:#}: {v:?}")
}
},
Err(std::env::VarError::NotPresent) => IoModeKind::preferred(),
Err(std::env::VarError::NotUnicode(_)) => {
panic!("env var {env_var_name} is not unicode");
}
};
set_io_mode(kind);
get_io_mode()
}
x => x,
}
} else {
mode
}
}
#[cfg(test)]
mod tests {
Expand Down
16 changes: 14 additions & 2 deletions scripts/flaky_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,18 @@ def main(args: argparse.Namespace):
else:
pageserver_virtual_file_io_engine_parameter = ""

# If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_MODE (i.e. not empty, not direct),
# use it to parametrize test name along with build_type and pg_version
#
# See test_runner/fixtures/parametrize.py for details
if (io_mode := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_MODE", "")) not in (
"",
"direct",
):
pageserver_virtual_file_io_mode_parameter = f"-{io_mode}"
else:
pageserver_virtual_file_io_mode_parameter = ""

# re-use existing records of flaky tests from before parametrization by compaction_algorithm
def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[dict[str, Any]]:
"""Duplicated from parametrize.py"""
Expand All @@ -90,10 +102,10 @@ def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[dict
if row["name"].endswith("]"):
parametrized_test = row["name"].replace(
"[",
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-",
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_virtual_file_io_mode_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-",
)
else:
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]"
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_virtual_file_io_mode_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]"

res[row["parent_suite"]][row["suite"]][parametrized_test] = True

Expand Down
5 changes: 5 additions & 0 deletions test_runner/fixtures/parametrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ def pytest_generate_tests(metafunc: Metafunc):
):
metafunc.parametrize("pageserver_virtual_file_io_engine", [io_engine])

# A hacky way to parametrize tests only for `pageserver_virtual_file_io_mode=buffered`
# And do not change test name for default `pageserver_virtual_file_io_mode=direct` to keep tests statistics
if (io_mode := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_MODE", "")) not in ("", "direct"):
metafunc.parametrize("pageserver_virtual_file_io_mode", [io_mode])

# Same hack for pageserver_default_tenant_config_compaction_algorithm
if (
explicit_default := get_pageserver_default_tenant_config_compaction_algorithm()
Expand Down
Loading