diff --git a/CMakeLists.txt b/CMakeLists.txt index b63296b2e4..28e2c2b4c0 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,6 +113,8 @@ if (CUTLASS_ENABLE_SYCL) add_compile_definitions(CUTLASS_SYCL_PROFILING_ENABLED) add_compile_definitions(SYCLCOMPAT_PROFILING_ENABLED) endif() + + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/onemkl.cmake) endif() find_package(Doxygen QUIET) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index e79cae54eb..e09f75e0df 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -65,6 +65,9 @@ function(cutlass_benchmark_add_executable NAME) ) if (CUTLASS_ENABLE_SYCL) + add_dependencies(${NAME} onemkl_project) + target_include_directories(${NAME} PRIVATE ${ONEMKL_INCLUDE_DIR}) + target_link_libraries(${NAME} PUBLIC ${ONEMKL_LIB}) add_sycl_to_target(TARGET ${NAME}) endif() diff --git a/cmake/onemkl.cmake b/cmake/onemkl.cmake new file mode 100644 index 0000000000..a73d1189d7 --- /dev/null +++ b/cmake/onemkl.cmake @@ -0,0 +1,62 @@ +# Copyright (c) 2024 - 2024 Codeplay Software Ltd. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include_guard() + +include(ExternalProject) + +set(ONEMKL_INSTALL_DIR ${CMAKE_BINARY_DIR}/deps/oneMKL) +set(ONEMKL_INCLUDE_DIR ${ONEMKL_INSTALL_DIR}/include) +set(ONEMKL_LIB_DIR ${ONEMKL_INSTALL_DIR}/lib) +set(ONEMKL_LIB ${ONEMKL_LIB_DIR}/libonemkl.so) + +ExternalProject_Add( + onemkl_project + + PREFIX ${ONEMKL_INSTALL_DIR} + GIT_REPOSITORY "https://github.com/oneapi-src/oneMKL.git" + GIT_TAG "v0.5" + + CMAKE_ARGS + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_GENERATOR=${CMAKE_GENERATOR} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${ONEMKL_INSTALL_DIR} + -DCMAKE_SHARED_LINKER_FLAGS="-Wl,-rpath=${ONEMKL_LIB_DIR}" + -DENABLE_MKLCPU_BACKEND=OFF + -DENABLE_MKLGPU_BACKEND=OFF + -DBUILD_FUNCTIONAL_TESTS=OFF + -DBUILD_EXAMPLES=OFF + -DBUILD_DOC=OFF + -DTARGET_DOMAINS=rng + INSTALL_DIR ${ONEMKL_INSTALL_DIR} + BUILD_BYPRODUCTS ${ONEMKL_LIB} +) + +add_library(oneMKL SHARED IMPORTED) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 26c69b310a..e9a7a02c9b 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -69,6 +69,9 @@ function(cutlass_example_add_executable NAME) ) if (CUTLASS_ENABLE_SYCL) + add_dependencies(${NAME} onemkl_project) + target_include_directories(${NAME} PRIVATE ${ONEMKL_INCLUDE_DIR}) + target_link_libraries(${NAME} PUBLIC ${ONEMKL_LIB}) add_sycl_to_target(TARGET ${NAME}) endif() diff --git a/tools/util/include/cutlass/util/reference/device/sycl_tensor_fill.h b/tools/util/include/cutlass/util/reference/device/sycl_tensor_fill.h index 7a039fd63c..9c9f5e5901 100644 --- a/tools/util/include/cutlass/util/reference/device/sycl_tensor_fill.h +++ b/tools/util/include/cutlass/util/reference/device/sycl_tensor_fill.h @@ -31,13 +31,14 @@ #pragma once // Standard Library includes -#include #include #include #include #include #include +#include + // Cutlass includes #include "cutlass/cutlass.h" #include "cutlass/complex.h" @@ -111,8 +112,8 @@ struct RandomUniformFunc { /// Parameters object Params params; - std::default_random_engine generator; - std::normal_distribution distribution; + oneapi::mkl::rng::device::philox4x32x10<> generator; + oneapi::mkl::rng::device::uniform distribution; // // Methods @@ -120,21 +121,20 @@ struct RandomUniformFunc { explicit RandomUniformFunc(Params const ¶ms): params(params), - generator(params.seed), - distribution(static_cast(params.min), static_cast(params.max)) { - } + generator(params.seed, ThreadIdxX() + BlockIdxX() * BlockDimX()), + distribution(static_cast(params.min), static_cast(params.max)){} /// Compute random value and update RNG state - CUTLASS_HOST + CUTLASS_HOST_DEVICE Element operator()() { - FloatType rnd = distribution(generator); - + FloatType rnd = oneapi::mkl::rng::device::generate(distribution, generator); + // Random values are cast to integer after scaling by a power of two to facilitate error // testing Element result; if (params.int_scale >= 0) { - rnd = FloatType(IntType(std::llround(rnd * params.float_scale_up))); + rnd = FloatType(IntType(sycl::round(rnd * params.float_scale_up))); result = Element(IntType(rnd * params.float_scale_down)); } else { @@ -163,13 +163,7 @@ void BlockFillRandomUniform( using RandomFunc = detail::RandomUniformFunc; typename RandomFunc::Params params(seed, max, min, bits); - - auto rand = RandomFunc(params); - auto h_vector = std::vector(capacity); - for (int j = 0; j < capacity; ++j) { - h_vector[j] = rand(); - } - syclcompat::memcpy(ptr, h_vector.data(), capacity); + BlockForEach(ptr, capacity, params); } ///////////////////////////////////////////////////////////////////////////////////////////////////