From 3bef74f5bee2a12a8c44b673ebe52380f7cbab4d Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Tue, 29 Oct 2024 16:33:30 +0000 Subject: [PATCH] Explicitly specify USMFree as a non-blocking operation. Also update CL adapter to reflect this. --- include/ur_api.h | 7 +++++ scripts/core/usm.yml | 3 +++ source/adapters/opencl/common.hpp | 2 +- .../adapters/opencl/extension_functions.def | 2 +- source/adapters/opencl/usm.cpp | 27 +++++++++---------- source/loader/ur_libapi.cpp | 7 +++++ source/ur_api.cpp | 7 +++++ 7 files changed, 38 insertions(+), 17 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 60d6fc2f70..44b8c4ed98 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -3656,6 +3656,13 @@ urUSMSharedAlloc( /////////////////////////////////////////////////////////////////////////////// /// @brief Free the USM memory object /// +/// @details +/// - Note that implementations are not obliged to wait for previously +/// enqueued operations that may be using `hMem` to finish before freeing +/// `hMem`. +/// - Care should be taken to properly synchronize calling this entry point +/// with any prior uses of `hMem`, especially kernel executions. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED diff --git a/scripts/core/usm.yml b/scripts/core/usm.yml index 77d5b7260a..1a131789b1 100644 --- a/scripts/core/usm.yml +++ b/scripts/core/usm.yml @@ -364,6 +364,9 @@ desc: "Free the USM memory object" class: $xUSM name: Free ordinal: "0" +details: + - "Note that implementations are not obliged to wait for previously enqueued operations that may be using `hMem` to finish before freeing `hMem`." + - "Care should be taken to properly synchronize calling this entry point with any prior uses of `hMem`, especially kernel executions." params: - type: $x_context_handle_t name: hContext diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index e21f78af6b..11f3ad17e3 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -186,7 +186,7 @@ namespace cl_ext { CONSTFIX char HostMemAllocName[] = "clHostMemAllocINTEL"; CONSTFIX char DeviceMemAllocName[] = "clDeviceMemAllocINTEL"; CONSTFIX char SharedMemAllocName[] = "clSharedMemAllocINTEL"; -CONSTFIX char MemBlockingFreeName[] = "clMemBlockingFreeINTEL"; +CONSTFIX char MemFreeName[] = "clMemFreeINTEL"; CONSTFIX char CreateBufferWithPropertiesName[] = "clCreateBufferWithPropertiesINTEL"; CONSTFIX char SetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; diff --git a/source/adapters/opencl/extension_functions.def b/source/adapters/opencl/extension_functions.def index 98359465ed..a6e688cb43 100644 --- a/source/adapters/opencl/extension_functions.def +++ b/source/adapters/opencl/extension_functions.def @@ -4,7 +4,7 @@ CL_EXTENSION_FUNC(clSharedMemAllocINTEL) CL_EXTENSION_FUNC(clGetDeviceFunctionPointer) CL_EXTENSION_FUNC(clGetDeviceGlobalVariablePointer) CL_EXTENSION_FUNC(clCreateBufferWithPropertiesINTEL) -CL_EXTENSION_FUNC(clMemBlockingFreeINTEL) +CL_EXTENSION_FUNC(clMemFreeINTEL) CL_EXTENSION_FUNC(clSetKernelArgMemPointerINTEL) CL_EXTENSION_FUNC(clEnqueueMemFillINTEL) CL_EXTENSION_FUNC(clEnqueueMemcpyINTEL) diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index dfcc1dfafa..913e59001f 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -239,16 +239,13 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, void *pMem) { - - // Use a blocking free to avoid issues with indirect access from kernels that - // might be still running. - clMemBlockingFreeINTEL_fn FuncPtr = nullptr; + clMemFreeINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, - cl_ext::MemBlockingFreeName, &FuncPtr); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clMemFreeINTELCache, + cl_ext::MemFreeName, &FuncPtr); if (FuncPtr) { RetVal = mapCLErrorToUR(FuncPtr(CLContext, pMem)); @@ -299,10 +296,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, cl_ext::EnqueueMemcpyName, &USMMemcpy)); - clMemBlockingFreeINTEL_fn USMFree = nullptr; - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, - cl_ext::MemBlockingFreeName, &USMFree)); + clMemFreeINTEL_fn USMFree = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clMemFreeINTELCache, + cl_ext::MemFreeName, &USMFree)); cl_int ClErr = CL_SUCCESS; auto HostBuffer = @@ -369,10 +366,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, cl_ext::EnqueueMemcpyName, &USMMemcpy)); - clMemBlockingFreeINTEL_fn USMFree = nullptr; - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, - cl_ext::MemBlockingFreeName, &USMFree)); + clMemFreeINTEL_fn USMFree = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clMemFreeINTELCache, + cl_ext::MemFreeName, &USMFree)); // Check if the two allocations are DEVICE allocations from different // devices, if they are we need to do the copy indirectly via a host diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 9a8e4c2e12..105a8a6c06 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2404,6 +2404,13 @@ ur_result_t UR_APICALL urUSMSharedAlloc( /////////////////////////////////////////////////////////////////////////////// /// @brief Free the USM memory object /// +/// @details +/// - Note that implementations are not obliged to wait for previously +/// enqueued operations that may be using `hMem` to finish before freeing +/// `hMem`. +/// - Care should be taken to properly synchronize calling this entry point +/// with any prior uses of `hMem`, especially kernel executions. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 92b02b7176..379cb7857f 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -2080,6 +2080,13 @@ ur_result_t UR_APICALL urUSMSharedAlloc( /////////////////////////////////////////////////////////////////////////////// /// @brief Free the USM memory object /// +/// @details +/// - Note that implementations are not obliged to wait for previously +/// enqueued operations that may be using `hMem` to finish before freeing +/// `hMem`. +/// - Care should be taken to properly synchronize calling this entry point +/// with any prior uses of `hMem`, especially kernel executions. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED