From 91eeaf1c8a80a37183aeddefb7202e261aff92f7 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 25 Aug 2023 17:15:27 +0000 Subject: [PATCH] FillRandom: Use MKL host API --- Src/Base/AMReX_Random.cpp | 40 ++++++++++++----------------------- Tools/CMake/AMReXSYCL.cmake | 2 +- Tools/GNUMake/comps/dpcpp.mak | 2 +- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/Src/Base/AMReX_Random.cpp b/Src/Base/AMReX_Random.cpp index 9e1059e6798..cc791a11fef 100644 --- a/Src/Base/AMReX_Random.cpp +++ b/Src/Base/AMReX_Random.cpp @@ -19,7 +19,7 @@ namespace namespace amrex { #ifdef AMREX_USE_SYCL sycl_rng_descr* rand_engine_descr = nullptr; -//xxxxx oneapi::mkl::rng::philox4x32x10* gpu_rand_generator = nullptr; + oneapi::mkl::rng::philox4x32x10* gpu_rand_generator = nullptr; #else amrex::randState_t* gpu_rand_state = nullptr; amrex::randGenerator_t gpu_rand_generator = nullptr; @@ -44,8 +44,8 @@ void ResizeRandomSeed (amrex::ULong gpu_seed) rand_engine_descr = new sycl_rng_descr (Gpu::Device::streamQueue(), sycl::range<1>(N), gpu_seed, 1); -//xxxxx gpu_rand_generator = new std::remove_pointer_t -// (Gpu::Device::streamQueue(), gpu_seed+1234ULL); + gpu_rand_generator = new std::remove_pointer_t + (Gpu::Device::streamQueue(), gpu_seed+1234ULL); #elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) @@ -212,11 +212,11 @@ DeallocateRandomSeedDevArray () Gpu::streamSynchronize(); rand_engine_descr = nullptr; } -//xxxxx if (gpu_rand_generator != nullptr) { -// delete gpu_rand_generator; -// Gpu::streamSynchronize(); -// gpu_rand_generator = nullptr; -// } + if (gpu_rand_generator != nullptr) { + delete gpu_rand_generator; + Gpu::streamSynchronize(); + gpu_rand_generator = nullptr; + } #else if (gpu_rand_state != nullptr) { @@ -258,15 +258,9 @@ void FillRandom (Real* p, Long N) #elif defined(AMREX_USE_SYCL) -//xxxxx oneapi::mkl::rng::uniform distr; -// auto event = oneapi::mkl::rng::generate(distr, gpu_rand_generator, N, p); -// event.wait(); - - amrex::ParallelForRNG(N, [=] AMREX_GPU_DEVICE (Long i, RandomEngine const& eng) - { - p[i] = Random(eng); - }); - Gpu::streamSynchronize(); + oneapi::mkl::rng::uniform distr; + auto event = oneapi::mkl::rng::generate(distr, *gpu_rand_generator, N, p); + event.wait(); #else std::uniform_real_distribution distribution(Real(0.0), Real(1.0)); @@ -299,15 +293,9 @@ void FillRandomNormal (Real* p, Long N, Real mean, Real stddev) #elif defined(AMREX_USE_SYCL) -//xxxxx oneapi::mkl::rng::gaussian distr(mean, stddev); -// auto event = oneapi::mkl::rng::generate(distr, gpu_rand_generator, N, p); -// event.wait(); - - amrex::ParallelForRNG(N, [=] AMREX_GPU_DEVICE (Long i, RandomEngine const& eng) - { - p[i] = RandomNormal(mean, stddev, eng); - }); - Gpu::streamSynchronize(); + oneapi::mkl::rng::gaussian distr(mean, stddev); + auto event = oneapi::mkl::rng::generate(distr, *gpu_rand_generator, N, p); + event.wait(); #else diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake index 42eb5c4802b..a67571dc412 100644 --- a/Tools/CMake/AMReXSYCL.cmake +++ b/Tools/CMake/AMReXSYCL.cmake @@ -53,7 +53,7 @@ endif() # target_link_options( SYCL INTERFACE - $<${_cxx_sycl}:-fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64> ) + $<${_cxx_sycl}:-qmkl=sequential -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64> ) # TODO: use $ genex for CMake >=3.17 diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak index 6e490d9c063..3bcf5cb4372 100644 --- a/Tools/GNUMake/comps/dpcpp.mak +++ b/Tools/GNUMake/comps/dpcpp.mak @@ -123,7 +123,7 @@ ifneq ($(BL_NO_FORT),TRUE) endif endif -LDFLAGS += -fsycl-device-lib=libc,libm-fp32,libm-fp64 +LDFLAGS += -qmkl=sequential -fsycl-device-lib=libc,libm-fp32,libm-fp64 ifdef SYCL_PARALLEL_LINK_JOBS LDFLAGS += -fsycl-max-parallel-link-jobs=$(SYCL_PARALLEL_LINK_JOBS)