diff --git a/README b/README index c933f61d6..0102ca33e 100644 --- a/README +++ b/README @@ -79,6 +79,7 @@ Example: XLIO DETAILS: SigIntr Ctrl-C Handle Enabled [XLIO_HANDLE_SIGINTR] XLIO DETAILS: SegFault Backtrace Disabled [XLIO_HANDLE_SIGSEGV] XLIO DETAILS: Print a report Disabled [XLIO_PRINT_REPORT] + XLIO DETAILS: Quick start Disabled [XLIO_QUICK_START] XLIO DETAILS: Ring allocation logic TX 0 (Ring per interface) [XLIO_RING_ALLOCATION_LOGIC_TX] XLIO DETAILS: Ring allocation logic RX 0 (Ring per interface) [XLIO_RING_ALLOCATION_LOGIC_RX] XLIO INFO : Ring migration ratio TX -1 [XLIO_RING_MIGRATION_RATIO_TX] @@ -308,6 +309,13 @@ When Enabled, print backtrace if segmentation fault happens. Value range is 0 to 1 Default value is 0 (Disabled) +XLIO_QUICK_START +Avoid expensive extra checks to reduce the initialization time. This may result +in failures in case of a system misconfiguration. +For example, if the parameter is enabled and hugepages are requested beyond the +cgroup limit, XLIO crashes due to an access to an unmapped page. +Default value is 0 (Disabled) + XLIO_ZC_BUFS Number of global zerocopy data buffer elements allocation. Default value is 200000 diff --git a/src/core/main.cpp b/src/core/main.cpp index 8b41c4314..24c69b623 100644 --- a/src/core/main.cpp +++ b/src/core/main.cpp @@ -501,6 +501,8 @@ void print_xlio_global_settings() safe_mce_sys().handle_segfault ? "Enabled " : "Disabled"); VLOG_PARAM_STRING("Print a report", safe_mce_sys().print_report, MCE_DEFAULT_PRINT_REPORT, SYS_VAR_PRINT_REPORT, safe_mce_sys().print_report ? "Enabled " : "Disabled"); + VLOG_PARAM_STRING("Quick start", safe_mce_sys().quick_start, MCE_DEFAULT_QUICK_START, + SYS_VAR_QUICK_START, safe_mce_sys().quick_start ? "Enabled " : "Disabled"); VLOG_PARAM_NUMSTR("Ring allocation logic TX", safe_mce_sys().ring_allocation_logic_tx, MCE_DEFAULT_RING_ALLOCATION_LOGIC_TX, SYS_VAR_RING_ALLOCATION_LOGIC_TX, diff --git a/src/core/util/hugepage_mgr.cpp b/src/core/util/hugepage_mgr.cpp index 36d180e69..79bc32687 100644 --- a/src/core/util/hugepage_mgr.cpp +++ b/src/core/util/hugepage_mgr.cpp @@ -114,7 +114,44 @@ void *hugepage_mgr::alloc_hugepages_helper(size_t &size, size_t hugepage) if (ptr == MAP_FAILED) { ptr = nullptr; __log_info_dbg("mmap failed (errno=%d), skipping hugepage %zu kB", errno, hugepage / 1024U); - } else { + } else if (!safe_mce_sys().quick_start) { + /* Check whether all the pages are resident. In a container, allocation beyond the limit can + * be successful and lead to SIGBUS on an access. + */ + const size_t pages_nr = actual_size / hugepage; + size_t resident_nr = 0; + char *page_ptr = reinterpret_cast(ptr); + int rc = 0; + + /* Checking a single page per hugepage in a loop is more efficient than a single mincore() + * syscall for the entire range. A single syscall would also require an array allocation + * which can grow to tens of MB for the preallocated memory region. + */ + for (size_t i = 0; rc == 0 && i < pages_nr; ++i) { + unsigned char vec; + rc = mincore(page_ptr, 1, &vec); + resident_nr += rc == 0 ? (vec & 1U) : 0; + page_ptr += hugepage; + } + + if (rc != 0 || resident_nr != pages_nr) { + int rc2 = munmap(ptr, actual_size); + if (rc2 < 0) { + __log_info_dbg("munmap failed (errno=%d)", errno); + } + if (rc < 0) { + __log_info_dbg("mincore() failed to verify hugepages (errno=%d)", errno); + } else if (resident_nr != pages_nr) { + __log_info_dbg("Not all hugepages are resident (allocated=%zu resident=%zu)", + pages_nr, resident_nr); + } + __log_info_dbg("Cannot use hugepages, skipping hugepage %zu kB", hugepage / 1024U); + + ptr = nullptr; + } + } + if (ptr) { + // Success. size = actual_size; } return ptr; diff --git a/src/core/util/sys_vars.cpp b/src/core/util/sys_vars.cpp index 6ada08b5c..ef61d7bc7 100644 --- a/src/core/util/sys_vars.cpp +++ b/src/core/util/sys_vars.cpp @@ -761,6 +761,7 @@ void mce_sys_var::get_env_params() service_enable = MCE_DEFAULT_SERVICE_ENABLE; print_report = MCE_DEFAULT_PRINT_REPORT; + quick_start = MCE_DEFAULT_QUICK_START; log_level = VLOG_DEFAULT; log_details = MCE_DEFAULT_LOG_DETAILS; log_colors = MCE_DEFAULT_LOG_COLORS; @@ -1084,6 +1085,10 @@ void mce_sys_var::get_env_params() print_report = atoi(env_ptr) ? true : false; } + if ((env_ptr = getenv(SYS_VAR_QUICK_START))) { + quick_start = atoi(env_ptr) ? true : false; + } + if ((env_ptr = getenv(SYS_VAR_LOG_FILENAME))) { read_env_variable_with_pid(log_filename, sizeof(log_filename), env_ptr); } diff --git a/src/core/util/sys_vars.h b/src/core/util/sys_vars.h index 95e463f1d..fc5f725a3 100644 --- a/src/core/util/sys_vars.h +++ b/src/core/util/sys_vars.h @@ -340,6 +340,7 @@ struct mce_sys_var { uint32_t mce_spec; bool print_report; + bool quick_start; vlog_levels_t log_level; uint32_t log_details; char log_filename[PATH_MAX]; @@ -558,6 +559,7 @@ extern mce_sys_var &safe_mce_sys(); #define SYS_VAR_HANDLE_SIGINTR "XLIO_HANDLE_SIGINTR" #define SYS_VAR_HANDLE_SIGSEGV "XLIO_HANDLE_SIGSEGV" #define SYS_VAR_STATS_FD_NUM "XLIO_STATS_FD_NUM" +#define SYS_VAR_QUICK_START "XLIO_QUICK_START" #define SYS_VAR_RING_ALLOCATION_LOGIC_TX "XLIO_RING_ALLOCATION_LOGIC_TX" #define SYS_VAR_RING_ALLOCATION_LOGIC_RX "XLIO_RING_ALLOCATION_LOGIC_RX" @@ -710,7 +712,8 @@ extern mce_sys_var &safe_mce_sys(); #define MCE_DEFAULT_APP_ID ("XLIO_DEFAULT_APPLICATION_ID") #define MCE_DEFAULT_HANDLE_SIGINTR (true) #define MCE_DEFAULT_HANDLE_SIGFAULT (false) -#define MCE_DEFAULT_STATS_FD_NUM 0 +#define MCE_DEFAULT_STATS_FD_NUM (0) +#define MCE_DEFAULT_QUICK_START (false) #define MCE_DEFAULT_RING_ALLOCATION_LOGIC_TX (RING_LOGIC_PER_THREAD) #define MCE_DEFAULT_RING_ALLOCATION_LOGIC_RX (RING_LOGIC_PER_THREAD) #define MCE_DEFAULT_RING_MIGRATION_RATIO_TX (-1)