From 5b6b09e2df8faf88d10dbe790bc1b0f9aea2826e Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 28 Oct 2024 15:44:59 +0100 Subject: [PATCH 01/14] sketch out pipeline caching --- include/nbl/video/utilities/CComputeBlit.h | 37 +++++----- src/nbl/video/utilities/CComputeBlit.cpp | 86 +++++++++++++++++++--- 2 files changed, 95 insertions(+), 28 deletions(-) diff --git a/include/nbl/video/utilities/CComputeBlit.h b/include/nbl/video/utilities/CComputeBlit.h index dc4c6f3c5..779dc2ddc 100644 --- a/include/nbl/video/utilities/CComputeBlit.h +++ b/include/nbl/video/utilities/CComputeBlit.h @@ -41,7 +41,21 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted } // ctor - inline CComputeBlit(core::smart_refctd_ptr&& logicalDevice) : m_device(std::move(logicalDevice)) {} + CComputeBlit( + core::smart_refctd_ptr&& logicalDevice, + core::smart_refctd_ptr&& cache=nullptr, + core::smart_refctd_ptr&& logger=nullptr + ); + + // if you set the balues too small, we'll correct them ourselves anyway + struct STask + { + uint32_t workgroupSizeLog2 : 4 = 0; + // the TRUE output format, not the storage view format you might manually encode into + hlsl::format::TexelBlockFormat outputFormat : 8 = hlsl::format::TexelBlockFormat::TBF_UNKNOWN; + uint32_t sharedMemoryPerInvocation : 6 = 0; + uint32_t unused : 14 = 0; + }; //! Returns the original format if supports STORAGE_IMAGE otherwise returns a format in its compat class which supports STORAGE_IMAGE. inline asset::E_FORMAT getOutputViewFormat(const asset::E_FORMAT format) @@ -66,22 +80,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted return compatFormat; } } -/* - struct STask - { - hlsl::vector preloadWindow; - asset::E_FORMAT inFormat; - asset::E_FORMAT outFormat; - // default no coverage adjustment - uint8_t alphaBinCountLog2 : 4 = 0; - }; - inline void initializeTaskDefault(STask& task) const - { - auto physDev = m_device->getPhysicalDevice(); - const auto formatTrait = hlsl::format::getTraits(static_cast(task.outFormat)); - task.alphaBinCountLog2 = hlsl::max(,task.alphaBinCountLog2); - } -*/ + #if 0 // @param `alphaBinCount` is only required to size the histogram present in the default nbl_glsl_blit_AlphaStatistics_t in default_compute_common.comp core::smart_refctd_ptr createAlphaTestSpecializedShader(const asset::IImage::E_TYPE inImageType, const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount); @@ -666,7 +665,11 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted EBT_COUNT }; + void createAndCachePipelines(CAssetConverter* converter, core::smart_refctd_ptr* pipelines, const std::span tasks); + core::smart_refctd_ptr m_device; + system::logger_opt_smart_ptr m_logger; + core::smart_refctd_ptr m_shaderCache; //! This calculates the inclusive upper bound on the preload region i.e. it will be reachable for some cases. For the rest it will be bigger //! by a pixel in each dimension. diff --git a/src/nbl/video/utilities/CComputeBlit.cpp b/src/nbl/video/utilities/CComputeBlit.cpp index 012f8a230..5856f42fc 100644 --- a/src/nbl/video/utilities/CComputeBlit.cpp +++ b/src/nbl/video/utilities/CComputeBlit.cpp @@ -5,6 +5,79 @@ using namespace nbl::system; using namespace nbl::asset; using namespace nbl::video; + +CComputeBlit::CComputeBlit(smart_refctd_ptr&& logicalDevice, smart_refctd_ptr&& cache, smart_refctd_ptr&& logger) : m_device(std::move(logicalDevice)), m_logger(nullptr) +{ + if (logger) + m_logger = std::move(logger); + else if (auto debugCb=m_device->getPhysicalDevice()->getDebugCallback(); debugCb->getLogger()) + m_logger = smart_refctd_ptr(debugCb->getLogger()); + + if (cache) + m_shaderCache = std::move(cache); + else + m_shaderCache = make_smart_refctd_ptr(); +} + +void CComputeBlit::createAndCachePipelines(CAssetConverter* converter, smart_refctd_ptr* pipelines, const std::span tasks) +{ + core::vector> cpuPplns; + cpuPplns.reserve(tasks.size()); + + const auto& limits = m_device->getPhysicalDevice()->getLimits(); + for (auto task : tasks) + { + // adjust task default values + { + if (task.workgroupSizeLog2(task.outputFormat),3,1.f); + const auto precisionAt0 = getFormatPrecision(static_cast(task.outputFormat),3,0.f); + if (limits.workgroupMemoryExplicitLayout16BitAccess && limits.shaderFloat16 && precisionAt1>=std::exp2f(-11.f) && precisionAt0>=std::numeric_limits::min()) + useFloat16 = true; + } + // the absolute minimum needed to store a single pixel + const auto singlePixelStorage = channels*(useFloat16 ? sizeof(hlsl::float16_t):sizeof(hlsl::float32_t)); + // also slightly more memory is needed + task.sharedMemoryPerInvocation = core::max(singlePixelStorage*2,task.sharedMemoryPerInvocation); + } + // create blit pipeline + cpuPplns.emplace_back(nullptr); + // create optional coverage normalization pipeline + cpuPplns.emplace_back(nullptr); + } + + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter; + inputs.logger = m_logger.getRaw(); + std::get>(inputs.assets) = {&cpuPplns.data()->get(),cpuPplns.size()}; + inputs.readShaderCache = m_shaderCache.get(); + inputs.writeShaderCache = m_shaderCache.get(); + // no pipeline cache, because we only make the same pipeline once, ever + auto reserveResults = converter->reserve(inputs); + assert(reserveResults.getRequiredQueueFlags().value==IQueue::FAMILY_FLAGS::NONE); + // copy over the results + { + auto rIt = reserveResults.getGPUObjects().data(); + // TODO: redo + for (size_t i=0; ivalue; + } + + // this just inserts the pipelines into the cache + { + CAssetConverter::SConvertParams params = {}; + auto convertResults = reserveResults.convert(params); + assert(!convertResults.blocking()); + } +} + #if 0 core::smart_refctd_ptr CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount) { @@ -39,21 +112,14 @@ core::smart_refctd_ptr CComputeBlit::createAlphaTestSpecializ "}\n"; auto cpuShader = core::make_smart_refctd_ptr(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSLGLSL::createAlphaTestSpecializedShader"); - - return m_device->createShader(std::move(cpuShader.get())); } -core::smart_refctd_ptr CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat, - const uint32_t alphaBinCount) +core::smart_refctd_ptr CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount) { const auto workgroupDims = getDefaultWorkgroupDims(imageType); const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount); const uint32_t blitDimCount = static_cast(imageType) + 1; - const auto castedFormat = getOutImageViewFormat(outFormat); - assert(outFormat == castedFormat); - const char* formatQualifier = asset::CHLSLCompiler::getStorageImageFormatQualifier(castedFormat); - std::ostringstream shaderSourceStream; shaderSourceStream @@ -67,7 +133,7 @@ core::smart_refctd_ptr CComputeBlit::createNormalizationSpeci "[[vk::binding(0, 0)]]\n" "nbl::hlsl::blit::impl::dim_to_image_properties::combined_sampler_t inCS;\n" - "[[vk::image_format(\"" << formatQualifier << "\")]]\n" + "[[vk::image_format(\"unknown\")]]\n" "[[vk::binding(1, 0)]]\n" "nbl::hlsl::blit::impl::dim_to_image_properties::image_t outImg;\n" @@ -90,7 +156,5 @@ core::smart_refctd_ptr CComputeBlit::createNormalizationSpeci "}\n"; auto cpuShader = core::make_smart_refctd_ptr(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSL::createNormalizationSpecializedShader"); - - return m_device->createShader(std::move(cpuShader.get())); } #endif \ No newline at end of file From 492a0ad804b65e656c8a7f29fbd5b16d7d58dc92 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 29 Oct 2024 15:05:55 +0100 Subject: [PATCH 02/14] ladies and gentlemen we have C++20 concepts in HLSL ! --- include/nbl/builtin/hlsl/concepts.hlsl | 42 ++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index b252b3437..91dc76970 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -1,23 +1,40 @@ -// Copyright (C) 2023 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #ifndef _NBL_BUILTIN_HLSL_CONCEPTS_INCLUDED_ #define _NBL_BUILTIN_HLSL_CONCEPTS_INCLUDED_ + #include #include #include -#if (__cplusplus >= 202002L && __cpp_concepts) +#ifndef __HLSL_VERSION +// TODO: old stuff, see how much we can remove #define NBL_CONCEPT_TYPE_PARAMS(...) template <__VA_ARGS__> #define NBL_CONCEPT_SIGNATURE(NAME, ...) concept NAME = requires(__VA_ARGS__) #define NBL_CONCEPT_BODY(...) { __VA_ARGS__ }; #define NBL_CONCEPT_ASSIGN(NAME, ...) concept NAME = __VA_ARGS__; #define NBL_REQUIRES(...) requires __VA_ARGS__ +// for struct definitions, use instead of closing `>` on the primary template parameter list +#define NBL_PRIMARY_REQUIRES(...) > requires (__VA_ARGS__) + +// to put right before the closing `>` of the primary template definition, otherwise `NBL_PARTIAL_REQUIRES` wont work on specializations +#define NBL_STRUCT_CONSTRAINABLE +// NOTE: C++20 requires and C++11 enable_if have to be in different places! ITS OF UTTMOST IMPORTANCE YOUR REQUIRE CLAUSES ARE IDENTICAL FOR BOTH MACROS +// put just after the closing `>` on the partial template specialization `template` declaration e.g. `template NBL_PARTIAL_REQ_TOP(SomeCond) +#define NBL_PARTIAL_REQ_TOP(...) requires (__VA_ARGS__) +// put just before closing `>` on the partial template specialization Type args, e.g. `MyStruct)> +#define NBL_PARTIAL_REQ_BOT(...) + +// condition +#define NBL_FUNC_REQUIRES_BEGIN(...) requires (__VA_ARGS__) +// return value +#define NBL_FUNC_REQUIRES_END(...) __VA_ARGS__ + #include namespace nbl @@ -77,12 +94,31 @@ concept matricial = is_matrix::value; #else +// TODO: old stuff, see how much we can remove // No C++20 support. Do nothing. #define NBL_CONCEPT_TYPE_PARAMS(...) #define NBL_CONCEPT_SIGNATURE(NAME, ...) #define NBL_CONCEPT_BODY(...) #define NBL_REQUIRES(...) + +// for struct definitions, use instead of closing `>` on the primary template parameter list +#define NBL_PRIMARY_REQUIRES(...) ,typename __requires=::nbl::hlsl::enable_if_t<(__VA_ARGS__),void> > + +// to put right before the closing `>` of the primary template definition, otherwise `NBL_PARTIAL_REQUIRES` wont work on specializations +#define NBL_STRUCT_CONSTRAINABLE ,typename __requires=void +// NOTE: C++20 requires and C++11 enable_if have to be in different places! ITS OF UTTMOST IMPORTANCE YOUR REQUIRE CLAUSES ARE IDENTICAL FOR BOTH MACROS +// put just after the closing `>` on the partial template specialization `template` declaration e.g. `template NBL_PARTIAL_REQ_TOP(SomeCond) +#define NBL_PARTIAL_REQ_TOP(...) +// put just before closing `>` on the partial template specialization Type args, e.g. `MyStruct)> +#define NBL_PARTIAL_REQ_BOT(...) ,std::enable_if_t<(__VA_ARGS__),void> + +// condition, use right after the closing `>` of a function template +#define NBL_FUNC_REQUIRES_BEGIN(...) ::nbl::hlsl::enable_if_t<(__VA_ARGS__), +// return value, use `END(T)` instead of the return value type declaration +#define NBL_FUNC_REQUIRES_END(...) __VA_ARGS__> + #endif + #endif \ No newline at end of file From fb2f7c6cc9e380c761a8d52acd8099a37ef97f21 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 29 Oct 2024 15:30:31 +0100 Subject: [PATCH 03/14] forgot to amend the commit --- include/nbl/builtin/hlsl/concepts.hlsl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index 91dc76970..033709e72 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -19,6 +19,10 @@ #define NBL_CONCEPT_ASSIGN(NAME, ...) concept NAME = __VA_ARGS__; #define NBL_REQUIRES(...) requires __VA_ARGS__ + +// to define a concept using `concept Name = SomeContexprBoolCondition;` +#define NBL_BOOL_CONCEPT concept + // for struct definitions, use instead of closing `>` on the primary template parameter list #define NBL_PRIMARY_REQUIRES(...) > requires (__VA_ARGS__) @@ -102,6 +106,9 @@ concept matricial = is_matrix::value; #define NBL_REQUIRES(...) +// to define a concept using `concept Name = SomeContexprBoolCondition;` +#define NBL_BOOL_CONCEPT NBL_CONSTEXPR_STATIC_INLINE bool + // for struct definitions, use instead of closing `>` on the primary template parameter list #define NBL_PRIMARY_REQUIRES(...) ,typename __requires=::nbl::hlsl::enable_if_t<(__VA_ARGS__),void> > From 86e9a664a21c575eafa8406aba6e5d74c06a7585 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 29 Oct 2024 15:34:25 +0100 Subject: [PATCH 04/14] start reworking the descriptor declarations --- include/nbl/builtin/hlsl/binding_info.hlsl | 24 ++++ include/nbl/builtin/hlsl/blit/common.hlsl | 107 +++++++-------- .../builtin/hlsl/blit/default_blit.comp.hlsl | 57 ++++++++ .../hlsl/blit/default_normalize.comp.hlsl | 18 +++ include/nbl/builtin/hlsl/blit/temp.hlsl | 40 ------ include/nbl/video/utilities/CComputeBlit.h | 80 ----------- src/nbl/builtin/CMakeLists.txt | 5 +- src/nbl/video/utilities/CComputeBlit.cpp | 129 +++++++++++++++++- 8 files changed, 279 insertions(+), 181 deletions(-) create mode 100644 include/nbl/builtin/hlsl/binding_info.hlsl create mode 100644 include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl create mode 100644 include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl delete mode 100644 include/nbl/builtin/hlsl/blit/temp.hlsl diff --git a/include/nbl/builtin/hlsl/binding_info.hlsl b/include/nbl/builtin/hlsl/binding_info.hlsl new file mode 100644 index 000000000..8702a32c3 --- /dev/null +++ b/include/nbl/builtin/hlsl/binding_info.hlsl @@ -0,0 +1,24 @@ +// Copyright (C) 2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_BINDING_INFO_INCLUDED_ +#define _NBL_BUILTIN_HLSL_BINDING_INFO_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +template +struct ConstevalBindingInfo +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t Set = set; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Index = ix; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Count = count; +}; + +} +} +#endif diff --git a/include/nbl/builtin/hlsl/blit/common.hlsl b/include/nbl/builtin/hlsl/blit/common.hlsl index 07bb3f942..6295e6870 100644 --- a/include/nbl/builtin/hlsl/blit/common.hlsl +++ b/include/nbl/builtin/hlsl/blit/common.hlsl @@ -4,84 +4,77 @@ #ifndef _NBL_BUILTIN_HLSL_BLIT_COMMON_INCLUDED_ #define _NBL_BUILTIN_HLSL_BLIT_COMMON_INCLUDED_ -#include +#include namespace nbl { namespace hlsl { -namespace blit +namespace glsl { -namespace impl +uint32_t gl_WorkGroupSize() { + return uint32_t3(ConstevalParameters::WorkGroupSize,1,1); +} +} +} +} -template -struct dim_to_image_properties { }; +using namespace nbl::hlsl; + +[[vk::binding(ConstevalParameters::kernel_weight_binding_t::Index,ConstevalParameters::kernel_weight_binding_t::Set)]] +Buffer kernelWeights[ConstevalParameters::kernel_weight_binding_t::Count]; +[[vk::binding(ConstevalParameters::input_sampler_binding_t::Index,ConstevalParameters::input_sampler_binding_t::Set)]] +SamplerState inSamp[ConstevalParameters::input_sampler_binding_t::Count]; +// aliased +[[vk::binding(ConstevalParameters::input_image_binding_t::Index,ConstevalParameters::input_image_binding_t::Set)]] +Texture1DArray inAs1DArray[ConstevalParameters::input_image_binding_t::Count]; +[[vk::binding(ConstevalParameters::input_image_binding_t::Index,ConstevalParameters::input_image_binding_t::Set)]] +Texture2DArray inAs2DArray[ConstevalParameters::input_image_binding_t::Count]; +[[vk::binding(ConstevalParameters::input_image_binding_t::Index,ConstevalParameters::input_image_binding_t::Set)]] +Texture3D inAs3D[ConstevalParameters::input_image_binding_t::Count]; +// aliased +[[vk::binding(ConstevalParameters::output_binding_t::Index,ConstevalParameters::output_binding_t::Set)]] [[vk::image_format("unknown")]] +RWTexture1DArray outAs1DArray[ConstevalParameters::output_binding_t::Count]; +[[vk::binding(ConstevalParameters::output_binding_t::Index,ConstevalParameters::output_binding_t::Set)]] [[vk::image_format("unknown")]] +RWTexture2DArray outAs2DArray[ConstevalParameters::output_binding_t::Count]; +[[vk::binding(ConstevalParameters::output_binding_t::Index,ConstevalParameters::output_binding_t::Set)]] [[vk::image_format("unknown")]] +RWTexture3D outAs3D[ConstevalParameters::output_binding_t::Count]; -template <> -struct dim_to_image_properties<1> -{ - using combined_sampler_t = Texture1DArray; - using image_t = RWTexture1DArray; - template - static vector getIndexCoord(vector coords, uint32_t layer) +groupshared uint32_t sMem[ConstevalParameters::SharedMemoryDWORDs]; +/* +struct HistogramAccessor +{ + void atomicAdd(uint32_t wgID, uint32_t bucket, uint32_t v) { - return vector(coords.x, layer); + InterlockedAdd(statsBuff[wgID * (ConstevalParameters::AlphaBinCount + 1) + bucket], v); } }; - -template <> -struct dim_to_image_properties<2> +struct SharedAccessor { - using combined_sampler_t = Texture2DArray; - using image_t = RWTexture2DArray; - - template - static vector getIndexCoord(vector coords, uint32_t layer) + float32_t get(float32_t idx) { - return vector(coords.xy, layer); + return sMem[idx]; + } + void set(float32_t idx, float32_t val) + { + sMem[idx] = val; } }; - -template <> -struct dim_to_image_properties<3> +struct InCSAccessor { - using combined_sampler_t = Texture3D; - using image_t = RWTexture3D; - - template - static vector getIndexCoord(vector coords, uint32_t layer) + float32_t4 get(float32_t3 c, uint32_t l) { - return vector(coords); + return inCS.SampleLevel(inSamp, blit::impl::dim_to_image_properties::getIndexCoord(c, l), 0); } }; - -} - - -template< - uint32_t _WorkGroupSizeX, - uint32_t _WorkGroupSizeY, - uint32_t _WorkGroupSizeZ, - uint32_t _SMemFloatsPerChannel, - uint32_t _BlitOutChannelCount, - uint32_t _BlitDimCount, - uint32_t _AlphaBinCount> -struct consteval_parameters_t +struct OutImgAccessor { - NBL_CONSTEXPR_STATIC_INLINE uint32_t SMemFloatsPerChannel = _SMemFloatsPerChannel; - NBL_CONSTEXPR_STATIC_INLINE uint32_t BlitOutChannelCount = _BlitOutChannelCount; - NBL_CONSTEXPR_STATIC_INLINE uint32_t BlitDimCount = _BlitDimCount; - NBL_CONSTEXPR_STATIC_INLINE uint32_t AlphaBinCount = _AlphaBinCount; - NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSizeX = _WorkGroupSizeX; - NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSizeY = _WorkGroupSizeY; - NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSizeZ = _WorkGroupSizeZ; - NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSize = WorkGroupSizeX * WorkGroupSizeY * WorkGroupSizeZ; + void set(int32_t3 c, uint32_t l, float32_t4 v) + { + outImg[blit::impl::dim_to_image_properties::getIndexCoord(c, l)] = v; + } }; - -} -} -} - +*/ #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl new file mode 100644 index 000000000..ad2749904 --- /dev/null +++ b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl @@ -0,0 +1,57 @@ +// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +//#include "nbl/builtin/hlsl/blit/common.hlsl" +//#include "nbl/builtin/hlsl/blit/parameters.hlsl" +//#include "nbl/builtin/hlsl/blit/compute_blit.hlsl" + + +groupshared uint32_t sMem[ConstevalParameters::SharedMemoryDWORDs]; +/* +struct HistogramAccessor +{ + void atomicAdd(uint32_t wgID, uint32_t bucket, uint32_t v) + { + InterlockedAdd(statsBuff[wgID * (ConstevalParameters::AlphaBinCount + 1) + bucket], v); + } +}; +struct KernelWeightsAccessor +{ + float32_t4 get(uint32_t idx) + { + return kernelWeights[idx]; + } +}; +struct InCSAccessor +{ + float32_t4 get(float32_t3 c, uint32_t l) + { + return inCS.SampleLevel(inSamp, blit::impl::dim_to_image_properties::getIndexCoord(c, l), 0); + } +}; +struct OutImgAccessor +{ + void set(int32_t3 c, uint32_t l, float32_t4 v) + { + outImg[blit::impl::dim_to_image_properties::getIndexCoord(c, l)] = v; + } +}; +*/ + +using namespace nbl::hlsl::blit; + +// TODO: push constants + +[numthreads(ConstevalParameters::WorkGroupSize,1,1)] +void main() +{ +/* + blit::compute_blit_t blit = blit::compute_blit_t::create(params); + InCSAccessor inCSA; + OutImgAccessor outImgA; + KernelWeightsAccessor kwA; + HistogramAccessor hA; + SharedAccessor sA; + blit.execute(inCSA, outImgA, kwA, hA, sA, workGroupID, localInvocationIndex); +*/ +} \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl b/include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl new file mode 100644 index 000000000..589f370c0 --- /dev/null +++ b/include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl @@ -0,0 +1,18 @@ +// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/builtin/hlsl/blit/common.hlsl" + + + +//#include "nbl/builtin/hlsl/blit/parameters.hlsl" +//#include "nbl/builtin/hlsl/blit/compute_blit.hlsl" + +using namespace nbl::hlsl::blit; + +// TODO: push constants + +[numthreads(ConstevalParameters::WorkGroupSize,1,1)] +void main() +{ +} \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/blit/temp.hlsl b/include/nbl/builtin/hlsl/blit/temp.hlsl deleted file mode 100644 index 4f8ced390..000000000 --- a/include/nbl/builtin/hlsl/blit/temp.hlsl +++ /dev/null @@ -1,40 +0,0 @@ -// TODO: Delete this file! -// This file is temporary file that defines all of the dependencies on PR #519 -// and should be deleted as soon as that's merged. -#ifndef _NBL_BUILTIN_HLSL_BLIT_TEMP_INCLUDED_ -#define _NBL_BUILTIN_HLSL_BLIT_TEMP_INCLUDED_ - - -namespace nbl -{ -namespace hlsl -{ - -namespace workgroup -{ - // This is slow naive scan but it doesn't matter as this file is going to - // be nuked. The interface is different than the one suggested in PR #519 - // because right now there's no easy hack-free way to access - // gl_localInvocationID globally. - template - T inclusive_scan(T value, NBL_REF_ARG(SharedAccessor) sharedAccessor, uint32_t localInvocationIndex) - { - Binop binop; - for (uint32_t i = 0; i < firstbithigh(WorkGroupSize); ++i) - { - sharedAccessor.main.set(localInvocationIndex, value); - GroupMemoryBarrierWithGroupSync(); - if (localInvocationIndex >= (1u << i)) - { - value = binop(sharedAccessor.main.get(localInvocationIndex - (1u << i)), value); - } - GroupMemoryBarrierWithGroupSync(); - } - return value; - } -} - -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/video/utilities/CComputeBlit.h b/include/nbl/video/utilities/CComputeBlit.h index 779dc2ddc..eae3f4bf0 100644 --- a/include/nbl/video/utilities/CComputeBlit.h +++ b/include/nbl/video/utilities/CComputeBlit.h @@ -130,86 +130,6 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted return m_normalizationPipelines[key]; } - template - core::smart_refctd_ptr createBlitSpecializedShader( - const asset::E_FORMAT outFormat, - const asset::IImage::E_TYPE imageType, - const core::vectorSIMDu32& inExtent, - const core::vectorSIMDu32& outExtent, - const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, - const typename BlitUtilities::convolution_kernels_t& kernels, - const uint32_t workgroupSize = 0, - const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount) - { - if (workgroupSize==0) - workgroupSize = m_device->getPhysicalDevice()->getLimits().maxWorkgroupSize; - - const auto workgroupDims = getDefaultWorkgroupDims(imageType); - const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount); - - const uint32_t outChannelCount = asset::getFormatChannelCount(outFormat); - const uint32_t smemFloatCount = m_availableSharedMemory / (sizeof(float) * outChannelCount); - const uint32_t blitDimCount = static_cast(imageType) + 1; - - const auto castedFormat = getOutImageViewFormat(outFormat); - assert(outFormat == castedFormat); - const char* formatQualifier = asset::CHLSLCompiler::getStorageImageFormatQualifier(castedFormat); - - std::ostringstream shaderSourceStream; - shaderSourceStream - << "#include \"nbl/builtin/hlsl/blit/common.hlsl\"\n" - "#include \"nbl/builtin/hlsl/blit/parameters.hlsl\"\n" - "#include \"nbl/builtin/hlsl/blit/compute_blit.hlsl\"\n"; - - shaderSourceStream - << "typedef nbl::hlsl::blit::consteval_parameters_t<" << workgroupSize << ", 1, 1, " << smemFloatCount << ", " - << outChannelCount << ", " << blitDimCount << ", " << paddedAlphaBinCount << "> ceval_params_t;\n"; - - shaderSourceStream - << "[[vk::combinedImageSampler]] [[vk::binding(0, 0)]]\n" - "nbl::hlsl::blit::impl::dim_to_image_properties::combined_sampler_t inCS;\n" - "[[vk::combinedImageSampler]] [[vk::binding(0, 0)]]\n" - "SamplerState inSamp;\n" - - "[[vk::image_format(\""<< formatQualifier << "\")]]\n" - "[[vk::binding(1, 0)]]\n" - "nbl::hlsl::blit::impl::dim_to_image_properties::image_t outImg;\n" - - "[[vk::binding(0, 1)]] Buffer kernelWeights;\n" - "[[vk::push_constant]] nbl::hlsl::blit::parameters_t params;" - "groupshared float32_t sMem[" << m_availableSharedMemory / sizeof(float) << "];\n"; - - if (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) - { - shaderSourceStream - << "[[vk::binding(2 , 0)]] RWStructuredBuffer statsBuff;\n" - "struct HistogramAccessor { void atomicAdd(uint32_t wgID, uint32_t bucket, uint32_t v) { InterlockedAdd(statsBuff[wgID * (ceval_params_t::AlphaBinCount + 1) + bucket], v); } };\n"; - } - else - { - shaderSourceStream << "struct HistogramAccessor { void atomicAdd(uint32_t wgID, uint32_t bucket, uint32_t v) { } };\n"; - } - - shaderSourceStream - << "struct KernelWeightsAccessor { float32_t4 get(float32_t idx) { return kernelWeights[idx]; } };\n" - "struct SharedAccessor { float32_t get(float32_t idx) { return sMem[idx]; } void set(float32_t idx, float32_t val) { sMem[idx] = val; } };\n" - "struct InCSAccessor { float32_t4 get(float32_t3 c, uint32_t l) { return inCS.SampleLevel(inSamp, nbl::hlsl::blit::impl::dim_to_image_properties::getIndexCoord(c, l), 0); } };\n" - "struct OutImgAccessor { void set(int32_t3 c, uint32_t l, float32_t4 v) { outImg[nbl::hlsl::blit::impl::dim_to_image_properties::getIndexCoord(c, l)] = v; } };\n" - - "[numthreads(ceval_params_t::WorkGroupSize, 1, 1)]\n" - "void main(uint32_t3 workGroupID : SV_GroupID, uint32_t localInvocationIndex : SV_GroupIndex)\n" - "{\n" - " nbl::hlsl::blit::compute_blit_t blit = nbl::hlsl::blit::compute_blit_t::create(params);\n" - " InCSAccessor inCSA; OutImgAccessor outImgA; KernelWeightsAccessor kwA; HistogramAccessor hA; SharedAccessor sA;\n" - " blit.execute(inCSA, outImgA, kwA, hA, sA, workGroupID, localInvocationIndex);\n" - "}\n"; - - auto cpuShader = core::make_smart_refctd_ptr(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_SHADER_STAGE::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlit::createBlitSpecializedShader"); - auto gpuShader = m_device->createShader(std::move(cpuShader.get())); - - return gpuShader; - } - template core::smart_refctd_ptr getBlitPipeline( const asset::E_FORMAT outFormat, diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 4705ca442..b3ec566be 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -263,7 +263,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/member_test_macros.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/blit/alpha_test.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/blit/compute_blit.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/blit/common.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/blit/temp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/blit/default_blit.comp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/blit/default_normalize.comp.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/blit/normalization.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/blit/parameters.hlsl") #device capability @@ -337,5 +338,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory_accessor.hlsl") #enums LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/enums.hlsl") +# +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/binding_info.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") diff --git a/src/nbl/video/utilities/CComputeBlit.cpp b/src/nbl/video/utilities/CComputeBlit.cpp index 5856f42fc..1ceb1ee41 100644 --- a/src/nbl/video/utilities/CComputeBlit.cpp +++ b/src/nbl/video/utilities/CComputeBlit.cpp @@ -1,4 +1,5 @@ #include "nbl/video/utilities/CComputeBlit.h" +#include "nbl/builtin/hlsl/binding_info.hlsl" using namespace nbl::core; using namespace nbl::system; @@ -47,10 +48,55 @@ void CComputeBlit::createAndCachePipelines(CAssetConverter* converter, smart_ref // also slightly more memory is needed task.sharedMemoryPerInvocation = core::max(singlePixelStorage*2,task.sharedMemoryPerInvocation); } + const auto common = [&]()->std::string + { + // TODO: introduce a common type between ImGUI and Blit for the descriptor infos + auto serializeBindingInfo = [](const hlsl::SBindingInfo& info={})->std::string + { + return "ConstevalBindingInfo<"+std::to_string(info.Set)+","+std::to_string(info.Set)+","+std::to_string(info.Count)+">"; + }; + + std::ostringstream tmp; + tmp << R"===( +#include "nbl/builtin/hlsl/binding_info.hlsl" + + +using namespace nbl::hlsl; + + +struct ConstevalParameters +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSize = )===" << (0x1u<smart_refctd_ptr + { + auto shader = make_smart_refctd_ptr( + (common+"\n#include \""+mainPath+"\"\n").c_str(), + IShader::E_SHADER_STAGE::ESS_COMPUTE, + IShader::E_CONTENT_TYPE::ECT_HLSL, + mainPath + ); + + ICPUComputePipeline::SCreationParams params = {}; + params.layout = nullptr; // TODO + params.shader.entryPoint = "main"; + params.shader.shader = shader.get(); + params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(limits.maxSubgroupSize)); + // needed for the prefix and reductions to work + params.shader.requireFullSubgroups = true; + return ICPUComputePipeline::create(params); + }; // create blit pipeline - cpuPplns.emplace_back(nullptr); - // create optional coverage normalization pipeline - cpuPplns.emplace_back(nullptr); + cpuPplns.emplace_back(createPipeline("default_blit.comp.hlsl")); + cpuPplns.emplace_back(createPipeline("default_normalize.comp.hlsl")); } CAssetConverter::SInputs inputs = {}; @@ -79,6 +125,83 @@ void CComputeBlit::createAndCachePipelines(CAssetConverter* converter, smart_ref } #if 0 + +template +core::smart_refctd_ptr createBlitSpecializedShader( + const asset::IImage::E_TYPE imageType, + const core::vectorSIMDu32& inExtent, + const core::vectorSIMDu32& outExtent, + const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, + const typename BlitUtilities::convolution_kernels_t& kernels, + const uint32_t workgroupSize = 0, + const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount) +{ + if (workgroupSize==0) + workgroupSize = m_device->getPhysicalDevice()->getLimits().maxWorkgroupSize; + + const auto workgroupDims = getDefaultWorkgroupDims(imageType); + const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount); + + const uint32_t outChannelCount = asset::getFormatChannelCount(outFormat); + const uint32_t smemFloatCount = m_availableSharedMemory / (sizeof(float) * outChannelCount); + const uint32_t blitDimCount = static_cast(imageType) + 1; + + + std::ostringstream shaderSourceStream; + shaderSourceStream + << "#include \"nbl/builtin/hlsl/blit/common.hlsl\"\n" + "#include \"nbl/builtin/hlsl/blit/parameters.hlsl\"\n" + "#include \"nbl/builtin/hlsl/blit/compute_blit.hlsl\"\n"; + + shaderSourceStream + << "typedef nbl::hlsl::blit::consteval_parameters_t<" << workgroupSize << ", 1, 1, " << smemFloatCount << ", " + << outChannelCount << ", " << blitDimCount << ", " << paddedAlphaBinCount << "> ceval_params_t;\n"; + + shaderSourceStream + << "[[vk::combinedImageSampler]] [[vk::binding(0, 0)]]\n" + "nbl::hlsl::blit::impl::dim_to_image_properties::combined_sampler_t inCS;\n" + "[[vk::combinedImageSampler]] [[vk::binding(0, 0)]]\n" + "SamplerState inSamp;\n" + + "[[vk::image_format(\""<< formatQualifier << "\")]]\n" + "[[vk::binding(1, 0)]]\n" + "nbl::hlsl::blit::impl::dim_to_image_properties::image_t outImg;\n" + + "[[vk::binding(0, 1)]] Buffer kernelWeights;\n" + "[[vk::push_constant]] nbl::hlsl::blit::parameters_t params;" + "groupshared float32_t sMem[" << m_availableSharedMemory / sizeof(float) << "];\n"; + + if (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) + { + shaderSourceStream + << "[[vk::binding(2 , 0)]] RWStructuredBuffer statsBuff;\n" + "struct HistogramAccessor { void atomicAdd(uint32_t wgID, uint32_t bucket, uint32_t v) { InterlockedAdd(statsBuff[wgID * (ceval_params_t::AlphaBinCount + 1) + bucket], v); } };\n"; + } + else + { + shaderSourceStream << "struct HistogramAccessor { void atomicAdd(uint32_t wgID, uint32_t bucket, uint32_t v) { } };\n"; + } + + shaderSourceStream + << "struct KernelWeightsAccessor { float32_t4 get(float32_t idx) { return kernelWeights[idx]; } };\n" + "struct SharedAccessor { float32_t get(float32_t idx) { return sMem[idx]; } void set(float32_t idx, float32_t val) { sMem[idx] = val; } };\n" + "struct InCSAccessor { float32_t4 get(float32_t3 c, uint32_t l) { return inCS.SampleLevel(inSamp, nbl::hlsl::blit::impl::dim_to_image_properties::getIndexCoord(c, l), 0); } };\n" + "struct OutImgAccessor { void set(int32_t3 c, uint32_t l, float32_t4 v) { outImg[nbl::hlsl::blit::impl::dim_to_image_properties::getIndexCoord(c, l)] = v; } };\n" + + "[numthreads(ceval_params_t::WorkGroupSize, 1, 1)]\n" + "void main(uint32_t3 workGroupID : SV_GroupID, uint32_t localInvocationIndex : SV_GroupIndex)\n" + "{\n" + " nbl::hlsl::blit::compute_blit_t blit = nbl::hlsl::blit::compute_blit_t::create(params);\n" + " InCSAccessor inCSA; OutImgAccessor outImgA; KernelWeightsAccessor kwA; HistogramAccessor hA; SharedAccessor sA;\n" + " blit.execute(inCSA, outImgA, kwA, hA, sA, workGroupID, localInvocationIndex);\n" + "}\n"; + + auto cpuShader = core::make_smart_refctd_ptr(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_SHADER_STAGE::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlit::createBlitSpecializedShader"); + auto gpuShader = m_device->createShader(std::move(cpuShader.get())); + + return gpuShader; +} + core::smart_refctd_ptr CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount) { const auto workgroupDims = getDefaultWorkgroupDims(imageType); From c6ba21128a27279b585f1966aa46fb169051810f Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 30 Oct 2024 12:46:26 +0100 Subject: [PATCH 05/14] improve the function template constraints a lot --- include/nbl/builtin/hlsl/concepts.hlsl | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index 033709e72..fea6dacc9 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -34,10 +34,8 @@ // put just before closing `>` on the partial template specialization Type args, e.g. `MyStruct)> #define NBL_PARTIAL_REQ_BOT(...) -// condition -#define NBL_FUNC_REQUIRES_BEGIN(...) requires (__VA_ARGS__) -// return value -#define NBL_FUNC_REQUIRES_END(...) __VA_ARGS__ +// condition, use instead of the closing `>` of a function template +#define NBL_FUNC_REQUIRES(...) > requires (__VA_ARGS__) #include @@ -120,10 +118,8 @@ concept matricial = is_matrix::value; // put just before closing `>` on the partial template specialization Type args, e.g. `MyStruct)> #define NBL_PARTIAL_REQ_BOT(...) ,std::enable_if_t<(__VA_ARGS__),void> -// condition, use right after the closing `>` of a function template -#define NBL_FUNC_REQUIRES_BEGIN(...) ::nbl::hlsl::enable_if_t<(__VA_ARGS__), -// return value, use `END(T)` instead of the return value type declaration -#define NBL_FUNC_REQUIRES_END(...) __VA_ARGS__> +// condition, use instead of the closing `>` of a function template +#define NBL_FUNC_REQUIRES(...) ,std::enable_if_t<(__VA_ARGS__),bool> = true> #endif From a4a9fc9dda20bbedfad320897c3d5e8a795db734 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 4 Nov 2024 12:13:35 +0100 Subject: [PATCH 06/14] add concept macros! --- include/nbl/builtin/hlsl/concepts.hlsl | 108 ++++++++++++++++------ include/nbl/builtin/hlsl/type_traits.hlsl | 3 + 2 files changed, 84 insertions(+), 27 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index fea6dacc9..bf16d3d1c 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -10,14 +10,31 @@ #include -#ifndef __HLSL_VERSION - -// TODO: old stuff, see how much we can remove -#define NBL_CONCEPT_TYPE_PARAMS(...) template <__VA_ARGS__> -#define NBL_CONCEPT_SIGNATURE(NAME, ...) concept NAME = requires(__VA_ARGS__) -#define NBL_CONCEPT_BODY(...) { __VA_ARGS__ }; -#define NBL_CONCEPT_ASSIGN(NAME, ...) concept NAME = __VA_ARGS__; -#define NBL_REQUIRES(...) requires __VA_ARGS__ +namespace nbl +{ +namespace hlsl +{ +namespace concepts +{ +// common implementation juice +#include +#define NBL_IMPL_CONCEPT_FULL_TPLT(z, n, unused) BOOST_PP_SEQ_ELEM(n,NBL_CONCEPT_TPLT_PRM_KINDS) BOOST_PP_SEQ_ELEM(n,NBL_CONCEPT_TPLT_PRM_NAMES) +#include +#define NBL_CONCEPT_FULL_TPLT() BOOST_PP_ENUM(BOOST_PP_SEQ_SIZE(NBL_CONCEPT_TPLT_PRM_NAMES),NBL_IMPL_CONCEPT_FULL_TPLT,DUMMY) +#include +#define NBL_CONCEPT_TPLT_PARAMS() BOOST_PP_SEQ_ENUM(NBL_CONCEPT_TPLT_PRM_NAMES) +#include +#include +#include +// +#define NBL_CONCEPT_REQ_TYPE 0 +#define NBL_CONCEPT_REQ_EXPR 1 +// +#define NBL_CONCEPT_REQ_EXPR_RET_TYPE 2 + + +//! Now diverge +#ifndef __cpp_concepts // to define a concept using `concept Name = SomeContexprBoolCondition;` @@ -37,14 +54,30 @@ // condition, use instead of the closing `>` of a function template #define NBL_FUNC_REQUIRES(...) > requires (__VA_ARGS__) -#include -namespace nbl -{ -namespace hlsl -{ -namespace concepts +// +#define NBL_CONCEPT_PARAM_T(ID,...) ID +// +#define NBL_IMPL_IMPL_CONCEPT_BEGIN(A,...) __VA_ARGS__ A +#define NBL_IMPL_CONCEPT_BEGIN(z,n,data) NBL_IMPL_IMPL_CONCEPT_BEGIN NBL_CONCEPT_PARAM_##n +// TODO: are empty local parameter lists valid? a.k.a. just a `()` +#define NBL_CONCEPT_BEGIN(LOCAL_PARAM_COUNT) template \ +concept NBL_CONCEPT_NAME = requires BOOST_PP_EXPR_IF(LOCAL_PARAM_COUNT,(BOOST_PP_ENUM(LOCAL_PARAM_COUNT,NBL_IMPL_CONCEPT_BEGIN,DUMMY))) \ { +// +#define NBL_IMPL_CONCEPT_REQ_TYPE(...) typename __VA_ARGS__; +#define NBL_IMPL_CONCEPT_REQ_EXPR(...) __VA_ARGS__; +#define NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE(E,C,...) {E}; C; +// +#define NBL_IMPL_CONCEPT (NBL_IMPL_CONCEPT_REQ_TYPE,NBL_IMPL_CONCEPT_REQ_EXPR,NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE) +// +#define NBL_IMPL_CONCEPT_END_DEF(r,unused,i,e) NBL_EVAL(BOOST_PP_TUPLE_ELEM(BOOST_PP_SEQ_HEAD(e),NBL_IMPL_CONCEPT) BOOST_PP_SEQ_TAIL(e)) +// +#define NBL_CONCEPT_END(SEQ) BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_CONCEPT_END_DEF, DUMMY, SEQ) \ +} + + +#include // Alias some of the std concepts in nbl. As this is C++20 only, we don't need to use // the macros here. @@ -90,22 +123,11 @@ concept vectorial = is_vector::value; template concept matricial = is_matrix::value; -} -} -} - #else -// TODO: old stuff, see how much we can remove -// No C++20 support. Do nothing. -#define NBL_CONCEPT_TYPE_PARAMS(...) -#define NBL_CONCEPT_SIGNATURE(NAME, ...) -#define NBL_CONCEPT_BODY(...) -#define NBL_REQUIRES(...) - // to define a concept using `concept Name = SomeContexprBoolCondition;` -#define NBL_BOOL_CONCEPT NBL_CONSTEXPR_STATIC_INLINE bool +#define NBL_BOOL_CONCEPT NBL_CONSTEXPR bool // for struct definitions, use instead of closing `>` on the primary template parameter list #define NBL_PRIMARY_REQUIRES(...) ,typename __requires=::nbl::hlsl::enable_if_t<(__VA_ARGS__),void> > @@ -121,7 +143,39 @@ concept matricial = is_matrix::value; // condition, use instead of the closing `>` of a function template #define NBL_FUNC_REQUIRES(...) ,std::enable_if_t<(__VA_ARGS__),bool> = true> -#endif +// +#define NBL_CONCEPT_BEGIN(LOCAL_PARAM_COUNT) namespace BOOST_PP_CAT(__concept__,NBL_CONCEPT_NAME) \ +{ +// +#define NBL_CONCEPT_PARAM_T(ID,...) ::nbl::hlsl::impl::declval<__VA_ARGS__ >() +// +#define NBL_IMPL_CONCEPT_REQ_TYPE(...) ::nbl::hlsl::make_void_t +#define NBL_IMPL_CONCEPT_REQ_EXPR(...) ::nbl::hlsl::make_void_t +#define NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE(E,C,...) C +// +#define NBL_IMPL_CONCEPT_SFINAE (typename=void,typename=void,bool=true) +#define NBL_IMPL_CONCEPT_SFINAE_SPEC (NBL_IMPL_CONCEPT_REQ_TYPE,NBL_IMPL_CONCEPT_REQ_EXPR,NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE) +// +#define NBL_IMPL_CONCEPT_END_DEF(r,unused,i,e) template \ +struct BOOST_PP_CAT(__requirement,i) : ::nbl::hlsl::false_type {}; \ +template \ +struct BOOST_PP_CAT(__requirement,i) : ::nbl::hlsl::true_type {}; +// +#define NBL_IMPL_CONCEPT_END_GET(r,unused,i,e) BOOST_PP_EXPR_IF(i,&&) BOOST_PP_CAT(__concept__,NBL_CONCEPT_NAME)::BOOST_PP_CAT(__requirement,i)::value +// +#define NBL_CONCEPT_END(SEQ) BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_CONCEPT_END_DEF, DUMMY, SEQ) \ +} \ +template \ +NBL_CONSTEXPR bool NBL_CONCEPT_NAME = BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_CONCEPT_END_GET, DUMMY, SEQ) + +// TODO: counterparts of all the other concepts + +#endif +} +} +} #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index d7b1102af..68cfc6476 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -574,6 +574,9 @@ using enable_if = std::enable_if; template using alignment_of = std::alignment_of; +template +using make_void_t = typename make_void::type; + template using remove_const = std::remove_const; template using remove_volatile = std::remove_volatile; template using remove_cv = std::remove_cv; From 0b2e2f15e90ee8ff93a22eac6b1470f5b9176375 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 4 Nov 2024 15:07:31 +0100 Subject: [PATCH 07/14] fix up type traits --- include/nbl/builtin/hlsl/type_traits.hlsl | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 68cfc6476..3a4e0eda7 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -156,10 +156,9 @@ namespace nbl { namespace hlsl { - +// namespace impl { - template class Trait, class T> struct base_type_forwarder : Trait {}; @@ -168,11 +167,14 @@ struct base_type_forwarder > : Trait {}; template class Trait, class T, uint16_t N, uint16_t M> struct base_type_forwarder > : Trait {}; - } -#ifdef __HLSL_VERSION // HLSL +// +template +struct make_void { using type = void; }; + +#ifdef __HLSL_VERSION // HLSL #define decltype(expr) __decltype(expr) @@ -391,9 +393,6 @@ struct enable_if : type_identity {}; template struct alignment_of; -template -struct make_void { using type = void; }; - // reference stuff needed for semantics // not for "human consumption" @@ -574,9 +573,6 @@ using enable_if = std::enable_if; template using alignment_of = std::alignment_of; -template -using make_void_t = typename make_void::type; - template using remove_const = std::remove_const; template using remove_volatile = std::remove_volatile; template using remove_cv = std::remove_cv; @@ -617,6 +613,9 @@ template NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; // Overlapping definitions +template +using make_void_t = typename make_void::type; + template struct conditional_value { From 4275c233ffdd6542a159ac8506037f8083e275d8 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 4 Nov 2024 15:09:05 +0100 Subject: [PATCH 08/14] add a general binding info --- include/nbl/asset/IPipelineLayout.h | 46 ++++++++++++++++++++-- include/nbl/builtin/hlsl/binding_info.hlsl | 9 +++++ include/nbl/ext/ImGui/ImGui.h | 1 + include/nbl/video/utilities/CComputeBlit.h | 32 +++++++++++---- src/nbl/ext/ImGui/ImGui.cpp | 1 + src/nbl/video/utilities/CComputeBlit.cpp | 12 ++---- 6 files changed, 82 insertions(+), 19 deletions(-) diff --git a/include/nbl/asset/IPipelineLayout.h b/include/nbl/asset/IPipelineLayout.h index 7628d0b48..fdbc97bbf 100644 --- a/include/nbl/asset/IPipelineLayout.h +++ b/include/nbl/asset/IPipelineLayout.h @@ -4,12 +4,14 @@ #ifndef _NBL_ASSET_I_PIPELINE_LAYOUT_H_INCLUDED_ #define _NBL_ASSET_I_PIPELINE_LAYOUT_H_INCLUDED_ +#include "nbl/macros.h" +#include "nbl/core/declarations.h" #include #include -#include "nbl/macros.h" -#include "nbl/core/declarations.h" +#include "nbl/asset/IDescriptorSetLayout.h" +#include "nbl/builtin/hlsl/binding_info.hlsl" namespace nbl::asset @@ -21,7 +23,7 @@ namespace nbl::asset however they serve as a fast path with regard to data upload from the CPU and data access from the GPU. - Note that IrrlichtBaW limits push constant size to 128 bytes. + Note that Nabla limits push constant size to 128 bytes. Push Constants are an alternative to an UBO where it performs really poorly, mostly very small and very frequent updates. Examples of which are: @@ -140,6 +142,44 @@ class IPipelineLayout return static_cast(i)-1; } + // utility function, if you compile shaders for specific layouts, not create layouts given shaders + using desc_type_bitset_t = std::bitset(IDescriptor::E_TYPE::ET_COUNT)>; + // TODO: add constraints for stage and creation flags, or just return the storage index & redirect? + core::string getBindingInfoForHLSL(const hlsl::SBindingInfo& info, const desc_type_bitset_t allowedTypes=desc_type_bitset_t().set()) const + { + if (info.set>=DESCRIPTOR_SET_COUNT) + return "#error \"::nbl::hlsl::SBindingInfo::set out of range!\""; + const auto* layout = m_descSetLayouts[info.set]; + if (!layout) + return "#error \"::nbl::hlsl::SBindingInfo::set layout is nullptr!\""; + // + using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect; + using storage_range_index_t = redirect_t::storage_range_index_t; + const redirect_t* redirect; + storage_range_index_t found; + { + const redirect_t::binding_number_t binding(info.binding); + for (auto t=0u; t(IDescriptor::E_TYPE::ET_COUNT); t++) + if (allowedTypes.test(t)) + { + redirect = &layout->getDescriptorRedirect(static_cast(t)); + found = redirect->findBindingStorageIndex(binding); + if (found) + break; + } + if (!found && allowedTypes.test(static_cast(IDescriptor::E_TYPE::ET_SAMPLER))) + { + redirect = &layout->getImmutableSamplerRedirect(); + found = redirect->findBindingStorageIndex(binding); + } + if (!found) + return "#error \"Could not find `::nbl::hlsl::SBindingInfo::binding` in `::nbl::hlsl::SBindingInfo::set`'s layout!\""; + } + const auto count = redirect->getCount(found); + assert(count); // this layout should have never passed validation + return "::nbl::hlsl::ConstevalBindingInfo<"+std::to_string(info.set)+","+std::to_string(info.binding)+","+std::to_string(count)+">"; + } + protected: IPipelineLayout( const std::span _pcRanges, diff --git a/include/nbl/builtin/hlsl/binding_info.hlsl b/include/nbl/builtin/hlsl/binding_info.hlsl index 8702a32c3..e03766516 100644 --- a/include/nbl/builtin/hlsl/binding_info.hlsl +++ b/include/nbl/builtin/hlsl/binding_info.hlsl @@ -19,6 +19,15 @@ struct ConstevalBindingInfo NBL_CONSTEXPR_STATIC_INLINE uint32_t Count = count; }; +// used for descriptor set layout lookups +struct SBindingInfo +{ + //! binding index for a given resource + uint32_t binding : 29; + //! descriptor set index for a resource + uint32_t set : 3; +}; + } } #endif diff --git a/include/nbl/ext/ImGui/ImGui.h b/include/nbl/ext/ImGui/ImGui.h index 58787b9d5..244195c01 100644 --- a/include/nbl/ext/ImGui/ImGui.h +++ b/include/nbl/ext/ImGui/ImGui.h @@ -24,6 +24,7 @@ class UI final : public core::IReferenceCounted struct SResourceParameters { //! for a given pipeline layout we need to know what is intended for UI resources + // TODO: introduce a common type between ImGUI and Blit for the descriptor infos "binding_info.hlsl" struct SBindingInfo { //! descriptor set index for a resource diff --git a/include/nbl/video/utilities/CComputeBlit.h b/include/nbl/video/utilities/CComputeBlit.h index eae3f4bf0..69b8d8ba2 100644 --- a/include/nbl/video/utilities/CComputeBlit.h +++ b/include/nbl/video/utilities/CComputeBlit.h @@ -47,16 +47,34 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted core::smart_refctd_ptr&& logger=nullptr ); - // if you set the balues too small, we'll correct them ourselves anyway - struct STask + // create your pipelines + struct SPipelines { + core::smart_refctd_ptr blit; + core::smart_refctd_ptr coverage; + }; + struct SPipelinesCreateInfo + { + // required + CAssetConverter* converter; + // in theory we _could_ accept either pipeline layout type (or just the base) and make the CPU one back from the GPU + const asset::ICPUPipelineLayout* layout; + // must be Uniform Texel Buffer descriptor type + hlsl::SBindingInfo kernelWeights; + // must be Sampled Image descriptor type + hlsl::SBindingInfo inputs; + // must be Sampler descriptor type + hlsl::SBindingInfo samplers; + // must be Storage Image descriptor type + hlsl::SBindingInfo outputs; + //! If you set the balues too small, we'll correct them ourselves anyway + // needs to be at least as big as the maximum subgroup size uint32_t workgroupSizeLog2 : 4 = 0; - // the TRUE output format, not the storage view format you might manually encode into - hlsl::format::TexelBlockFormat outputFormat : 8 = hlsl::format::TexelBlockFormat::TBF_UNKNOWN; + // uint32_t sharedMemoryPerInvocation : 6 = 0; - uint32_t unused : 14 = 0; }; - + SPipelines createAndCachePipelines(const SPipelinesCreateInfo& info); + //! Returns the original format if supports STORAGE_IMAGE otherwise returns a format in its compat class which supports STORAGE_IMAGE. inline asset::E_FORMAT getOutputViewFormat(const asset::E_FORMAT format) { @@ -585,8 +603,6 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted EBT_COUNT }; - void createAndCachePipelines(CAssetConverter* converter, core::smart_refctd_ptr* pipelines, const std::span tasks); - core::smart_refctd_ptr m_device; system::logger_opt_smart_ptr m_logger; core::smart_refctd_ptr m_shaderCache; diff --git a/src/nbl/ext/ImGui/ImGui.cpp b/src/nbl/ext/ImGui/ImGui.cpp index 91b9e4115..9e9f9f2e5 100644 --- a/src/nbl/ext/ImGui/ImGui.cpp +++ b/src/nbl/ext/ImGui/ImGui.cpp @@ -221,6 +221,7 @@ core::smart_refctd_ptr UI::createPipeline(SCreation std::stringstream stream; + // TODO: Use the `ConstevalBindingInfo` stream << "// -> this code has been autogenerated with Nabla ImGUI extension\n" << "#define NBL_TEXTURES_BINDING_IX " << creationParams.resources.texturesInfo.bindingIx << "\n" << "#define NBL_SAMPLER_STATES_BINDING_IX " << creationParams.resources.samplersInfo.bindingIx << "\n" diff --git a/src/nbl/video/utilities/CComputeBlit.cpp b/src/nbl/video/utilities/CComputeBlit.cpp index 1ceb1ee41..1dd123952 100644 --- a/src/nbl/video/utilities/CComputeBlit.cpp +++ b/src/nbl/video/utilities/CComputeBlit.cpp @@ -20,8 +20,9 @@ CComputeBlit::CComputeBlit(smart_refctd_ptr&& logicalDevice, sma m_shaderCache = make_smart_refctd_ptr(); } -void CComputeBlit::createAndCachePipelines(CAssetConverter* converter, smart_refctd_ptr* pipelines, const std::span tasks) +auto CComputeBlit::createAndCachePipelines(const SPipelinesCreateInfo& info) -> SPipelines { + SPipelines retval; core::vector> cpuPplns; cpuPplns.reserve(tasks.size()); @@ -50,12 +51,6 @@ void CComputeBlit::createAndCachePipelines(CAssetConverter* converter, smart_ref } const auto common = [&]()->std::string { - // TODO: introduce a common type between ImGUI and Blit for the descriptor infos - auto serializeBindingInfo = [](const hlsl::SBindingInfo& info={})->std::string - { - return "ConstevalBindingInfo<"+std::to_string(info.Set)+","+std::to_string(info.Set)+","+std::to_string(info.Count)+">"; - }; - std::ostringstream tmp; tmp << R"===( #include "nbl/builtin/hlsl/binding_info.hlsl" @@ -67,7 +62,7 @@ using namespace nbl::hlsl; struct ConstevalParameters { NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSize = )===" << (0x1u<getBindingInfoForHLSL() << R"===(; using input_sampler_binding_t = )===" << serializeBindingInfo() << R"===(; using input_image_binding_t = )===" << serializeBindingInfo() << R"===(; using output_binding_t = )===" << serializeBindingInfo() << R"===(; @@ -122,6 +117,7 @@ struct ConstevalParameters auto convertResults = reserveResults.convert(params); assert(!convertResults.blocking()); } + return retval; } #if 0 From bb757d8c29015b2c3ef02c399c9da07067254172 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 5 Nov 2024 07:33:47 +0100 Subject: [PATCH 09/14] make concepts work P.S. also make the HLSL `decltype` macro forward perfectly --- include/nbl/builtin/hlsl/blit/parameters.hlsl | 32 ++++++++++++++++ include/nbl/builtin/hlsl/concepts.hlsl | 13 +++---- .../nbl/builtin/hlsl/member_test_macros.hlsl | 13 +++---- include/nbl/builtin/hlsl/type_traits.hlsl | 2 +- include/nbl/builtin/hlsl/utility.hlsl | 38 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 83 insertions(+), 16 deletions(-) create mode 100644 include/nbl/builtin/hlsl/utility.hlsl diff --git a/include/nbl/builtin/hlsl/blit/parameters.hlsl b/include/nbl/builtin/hlsl/blit/parameters.hlsl index d280cc523..3992fcd68 100644 --- a/include/nbl/builtin/hlsl/blit/parameters.hlsl +++ b/include/nbl/builtin/hlsl/blit/parameters.hlsl @@ -44,6 +44,38 @@ struct parameters_t } }; +struct parameters2_t +{ + float32_t3 fScale; + float32_t3 negativeSupportMinusHalf; + float32_t referenceAlpha; + uint32_t kernelWeightsOffsetY; + uint32_t kernelWeightsOffsetZ; + uint32_t inPixelCount; + uint32_t outPixelCount; + + uint16_t3 inputDims; + uint16_t3 outputDims; + uint16_t3 windowDims; + uint16_t3 phaseCount; + uint16_t3 preloadRegion; + uint16_t3 iterationRegionXPrefixProducts; + uint16_t3 iterationRegionYPrefixProducts; + uint16_t3 iterationRegionZPrefixProducts; + + //! Offset into the shared memory array which tells us from where the second buffer of shared memory begins + //! Given by max(memory_for_preload_region, memory_for_result_of_y_pass) + uint16_t secondScratchOffset; + uint16_t outputTexelsPerWGZ; + + uint32_t3 getOutputTexelsPerWG() + { + //! `outputTexelsPerWG.xy` just happens to be in the first components of `iterationRegionsXPrefixProducts` and `iterationRegionYPrefixProducts` --this is + //! the result of how we choose to iterate, i.e. if, in the future, we decide to iterate differently, this needs to change. + return uint32_t3(iterationRegionXPrefixProducts.x, iterationRegionYPrefixProducts.x, outputTexelsPerWGZ); + } +}; + } } diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index bf16d3d1c..0aa1af7b5 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -7,7 +7,7 @@ #include #include -#include +#include namespace nbl @@ -148,20 +148,19 @@ concept matricial = is_matrix::value; #define NBL_CONCEPT_BEGIN(LOCAL_PARAM_COUNT) namespace BOOST_PP_CAT(__concept__,NBL_CONCEPT_NAME) \ { // -#define NBL_CONCEPT_PARAM_T(ID,...) ::nbl::hlsl::impl::declval<__VA_ARGS__ >() +#define NBL_CONCEPT_PARAM_T(ID,...) ::nbl::hlsl::experimental::declval<__VA_ARGS__ >() // #define NBL_IMPL_CONCEPT_REQ_TYPE(...) ::nbl::hlsl::make_void_t #define NBL_IMPL_CONCEPT_REQ_EXPR(...) ::nbl::hlsl::make_void_t -#define NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE(E,C,...) C +#define NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE(E,C,...) ::nbl::hlsl::enable_if_t > // -#define NBL_IMPL_CONCEPT_SFINAE (typename=void,typename=void,bool=true) -#define NBL_IMPL_CONCEPT_SFINAE_SPEC (NBL_IMPL_CONCEPT_REQ_TYPE,NBL_IMPL_CONCEPT_REQ_EXPR,NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE) +#define NBL_IMPL_CONCEPT_SFINAE (NBL_IMPL_CONCEPT_REQ_TYPE,NBL_IMPL_CONCEPT_REQ_EXPR,NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE) // -#define NBL_IMPL_CONCEPT_END_DEF(r,unused,i,e) template \ +#define NBL_IMPL_CONCEPT_END_DEF(r,unused,i,e) template \ struct BOOST_PP_CAT(__requirement,i) : ::nbl::hlsl::false_type {}; \ template \ struct BOOST_PP_CAT(__requirement,i) : ::nbl::hlsl::true_type {}; // #define NBL_IMPL_CONCEPT_END_GET(r,unused,i,e) BOOST_PP_EXPR_IF(i,&&) BOOST_PP_CAT(__concept__,NBL_CONCEPT_NAME)::BOOST_PP_CAT(__requirement,i)::value diff --git a/include/nbl/builtin/hlsl/member_test_macros.hlsl b/include/nbl/builtin/hlsl/member_test_macros.hlsl index f103d6d83..7579fb0fa 100644 --- a/include/nbl/builtin/hlsl/member_test_macros.hlsl +++ b/include/nbl/builtin/hlsl/member_test_macros.hlsl @@ -4,7 +4,7 @@ #ifndef _NBL_BUILTIN_HLSL_MEMBER_TEST_MACROS_INCLUDED_ #define _NBL_BUILTIN_HLSL_MEMBER_TEST_MACROS_INCLUDED_ -#include +#include #include #ifdef __HLSL_VERSION @@ -24,9 +24,6 @@ enum e_member_presence is_const = 1<<2, }; -template -T declval(){} - template struct if_2_else_1 : integral_constant {}; template<> @@ -53,7 +50,7 @@ struct is_static_member_##a:: template \ struct is_member_##a: false_type { using type = void; }; \ template \ -struct is_member_##a().a),void>::value,void>::type> : true_type { using type = decltype(declval().a); }; \ +struct is_member_##a().a),void>::value,void>::type> : true_type { using type = decltype(experimental::declval().a); }; \ } \ template \ struct has_member_##a { NBL_CONSTEXPR_STATIC_INLINE e_member_presence value = (e_member_presence)(impl::is_member_##a::value + 2*impl::is_static_member_##a::value + 4*is_const::type>::value); }; \ @@ -72,7 +69,7 @@ NBL_GENERATE_MEMBER_TESTER(w) #define NBL_TYPE_DECLARE(z, n, x) BOOST_PP_COMMA_IF(x) typename Arg##n #define NBL_TYPE_DECLARE_DEFAULT(z, n, x) BOOST_PP_COMMA_IF(x) typename Arg##n=void #define NBL_TYPE_FWD(z, n, x) BOOST_PP_COMMA_IF(x) Arg##n -#define NBL_DECLVAL_DECLARE(z, n, x) impl::declval() BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), x)) +#define NBL_DECLVAL_DECLARE(z, n, x) experimental::declval() BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(BOOST_PP_INC(n), x)) #define GENERATE_STATIC_METHOD_TESTER_SPEC(z, n, x) \ template \ @@ -89,9 +86,9 @@ BOOST_PP_REPEAT(n, GENERATE_STATIC_METHOD_TESTER_SPEC, x) #define GENERATE_METHOD_TESTER_SPEC(z, n, x) \ template \ -struct has_method_##x().x(BOOST_PP_REPEAT(n, NBL_DECLVAL_DECLARE, n)))>::type> : impl::if_2_else_1::value> \ +struct has_method_##x().x(BOOST_PP_REPEAT(n, NBL_DECLVAL_DECLARE, n)))>::type> : impl::if_2_else_1::value> \ { \ - using return_type = decltype(impl::declval().x(BOOST_PP_REPEAT(n, NBL_DECLVAL_DECLARE, n))); \ + using return_type = decltype(experimental::declval().x(BOOST_PP_REPEAT(n, NBL_DECLVAL_DECLARE, n))); \ NBL_CONSTEXPR_STATIC_INLINE uint arg_count = n; \ }; diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 3a4e0eda7..1481d087f 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -176,7 +176,7 @@ struct make_void { using type = void; }; #ifdef __HLSL_VERSION // HLSL -#define decltype(expr) __decltype(expr) +#define decltype(...) __decltype(__VA_ARGS__) template struct type_identity diff --git a/include/nbl/builtin/hlsl/utility.hlsl b/include/nbl/builtin/hlsl/utility.hlsl new file mode 100644 index 000000000..487d4a7d7 --- /dev/null +++ b/include/nbl/builtin/hlsl/utility.hlsl @@ -0,0 +1,38 @@ +// Copyright (C) 2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_UTILITY_INCLUDED_ +#define _NBL_BUILTIN_HLSL_UTILITY_INCLUDED_ + + +#include + + +// for now we only implement declval +namespace nbl +{ +namespace hlsl +{ +#ifndef __HLSL_VERSION + +template +std::add_rvalue_reference_t declval() noexcept +{ + static_assert(false,"Actually calling declval is ill-formed."); +} + +#else + +namespace experimental +{ + +template +T declval() {} + +} + +#endif +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index b3ec566be..53ab53497 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -256,6 +256,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/concepts.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/functional.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/limits.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/type_traits.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/utility.hlsl") #metaprogramming LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/mpl.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/member_test_macros.hlsl") From 067e8a385750177bf91f92aa0a1c832ba0d1c6b6 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 5 Nov 2024 16:46:08 +0100 Subject: [PATCH 10/14] make pipeline layouts const and improve the `getBindingInfoForHLSL` --- include/nbl/asset/ICPUComputePipeline.h | 6 ++--- include/nbl/asset/ICPUGraphicsPipeline.h | 4 +-- include/nbl/asset/ICPUPipeline.h | 6 ++--- include/nbl/asset/IPipeline.h | 6 ++--- include/nbl/asset/IPipelineLayout.h | 32 ++++++++++++++++-------- 5 files changed, 32 insertions(+), 22 deletions(-) diff --git a/include/nbl/asset/ICPUComputePipeline.h b/include/nbl/asset/ICPUComputePipeline.h index f3af332c6..14b027715 100644 --- a/include/nbl/asset/ICPUComputePipeline.h +++ b/include/nbl/asset/ICPUComputePipeline.h @@ -25,7 +25,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> { if (!params.layout) return nullptr; - auto retval = new ICPUComputePipeline(core::smart_refctd_ptr(params.layout)); + auto retval = new ICPUComputePipeline(core::smart_refctd_ptr(params.layout)); if (!retval->setSpecInfo(params.shader)) { retval->drop(); @@ -48,7 +48,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> using base_t::base_t; virtual ~ICPUComputePipeline() = default; - base_t* clone_impl(core::smart_refctd_ptr&& layout) const override + base_t* clone_impl(core::smart_refctd_ptr&& layout) const override { return new ICPUComputePipeline(std::move(layout)); } @@ -57,7 +57,7 @@ class ICPUComputePipeline : public ICPUPipeline,1> { if (ix!=0) return m_stages[0].shader.get(); - return m_layout.get(); + return const_cast(m_layout.get()); } inline int8_t stageToIndex(const ICPUShader::E_SHADER_STAGE stage) const override diff --git a/include/nbl/asset/ICPUGraphicsPipeline.h b/include/nbl/asset/ICPUGraphicsPipeline.h index 8b922c5a4..e319b2750 100644 --- a/include/nbl/asset/ICPUGraphicsPipeline.h +++ b/include/nbl/asset/ICPUGraphicsPipeline.h @@ -65,7 +65,7 @@ class ICPUGraphicsPipeline final : public ICPUPipeline&& layout) const override + base_t* clone_impl(core::smart_refctd_ptr&& layout) const override { std::array _shaders; for (auto i=0; i(m_layout.get()); if (ix==1) return m_renderpass.get(); size_t stageCount = 0; diff --git a/include/nbl/asset/ICPUPipeline.h b/include/nbl/asset/ICPUPipeline.h index 7a0f0c5bf..5c43df017 100644 --- a/include/nbl/asset/ICPUPipeline.h +++ b/include/nbl/asset/ICPUPipeline.h @@ -51,11 +51,11 @@ class ICPUPipeline : public IAsset, public PipelineNonAssetBase ICPUPipelineLayout* getLayout() { assert(isMutable()); - return PipelineNonAssetBase::m_layout.get(); + return const_cast(PipelineNonAssetBase::m_layout.get()); } const ICPUPipelineLayout* getLayout() const { return PipelineNonAssetBase::m_layout.get(); } - inline void setLayout(core::smart_refctd_ptr&& _layout) + inline void setLayout(core::smart_refctd_ptr&& _layout) { assert(isMutable()); PipelineNonAssetBase::m_layout = std::move(_layout); @@ -117,7 +117,7 @@ class ICPUPipeline : public IAsset, public PipelineNonAssetBase using PipelineNonAssetBase::PipelineNonAssetBase; virtual ~ICPUPipeline() = default; - virtual this_t* clone_impl(core::smart_refctd_ptr&& layout) const = 0; + virtual this_t* clone_impl(core::smart_refctd_ptr&& layout) const = 0; virtual int8_t stageToIndex(const ICPUShader::E_SHADER_STAGE stage) const = 0; struct ShaderStage { diff --git a/include/nbl/asset/IPipeline.h b/include/nbl/asset/IPipeline.h index 6af7b50bf..40623876f 100644 --- a/include/nbl/asset/IPipeline.h +++ b/include/nbl/asset/IPipeline.h @@ -35,7 +35,7 @@ class IPipeline struct SCreationParams { public: - PipelineLayout* layout = nullptr; + const PipelineLayout* layout = nullptr; protected: // This is not public to make sure that different pipelines only get the enums they support @@ -107,9 +107,9 @@ class IPipeline inline const PipelineLayout* getLayout() const {return m_layout.get();} protected: - inline IPipeline(core::smart_refctd_ptr&& _layout) : m_layout(std::move(_layout)) {} + inline IPipeline(core::smart_refctd_ptr&& _layout) : m_layout(std::move(_layout)) {} - core::smart_refctd_ptr m_layout; + core::smart_refctd_ptr m_layout; }; } diff --git a/include/nbl/asset/IPipelineLayout.h b/include/nbl/asset/IPipelineLayout.h index fdbc97bbf..7cc980290 100644 --- a/include/nbl/asset/IPipelineLayout.h +++ b/include/nbl/asset/IPipelineLayout.h @@ -143,41 +143,51 @@ class IPipelineLayout } // utility function, if you compile shaders for specific layouts, not create layouts given shaders - using desc_type_bitset_t = std::bitset(IDescriptor::E_TYPE::ET_COUNT)>; + struct SBindingKey + { + using type_bitset_t = std::bitset(IDescriptor::E_TYPE::ET_COUNT)>; + + hlsl::SBindingInfo binding = {}; + core::bitflag requiredStages = IShader::E_SHADER_STAGE::ESS_UNKNOWN; + // could have just initialized with `~type_bitset_t()` in C++23 + type_bitset_t allowedTypes = type_bitset_t((0x1u<(IDescriptor::E_TYPE::ET_COUNT))-1); + }; // TODO: add constraints for stage and creation flags, or just return the storage index & redirect? - core::string getBindingInfoForHLSL(const hlsl::SBindingInfo& info, const desc_type_bitset_t allowedTypes=desc_type_bitset_t().set()) const + core::string getBindingInfoForHLSL(const SBindingKey& key) const { - if (info.set>=DESCRIPTOR_SET_COUNT) - return "#error \"::nbl::hlsl::SBindingInfo::set out of range!\""; - const auto* layout = m_descSetLayouts[info.set]; + if (key.binding.set>=DESCRIPTOR_SET_COUNT) + return "#error \"IPipelineLayout::SBindingKey::binding::set out of range!\""; + const auto* layout = m_descSetLayouts[key.binding.set].get(); if (!layout) - return "#error \"::nbl::hlsl::SBindingInfo::set layout is nullptr!\""; + return "#error \"IPipelineLayout::SBindingKey::binding::set layout is nullptr!\""; // using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect; using storage_range_index_t = redirect_t::storage_range_index_t; const redirect_t* redirect; storage_range_index_t found; { - const redirect_t::binding_number_t binding(info.binding); + const redirect_t::binding_number_t binding(key.binding.binding); for (auto t=0u; t(IDescriptor::E_TYPE::ET_COUNT); t++) - if (allowedTypes.test(t)) + if (key.allowedTypes.test(t)) { redirect = &layout->getDescriptorRedirect(static_cast(t)); found = redirect->findBindingStorageIndex(binding); if (found) break; } - if (!found && allowedTypes.test(static_cast(IDescriptor::E_TYPE::ET_SAMPLER))) + if (!found && key.allowedTypes.test(static_cast(IDescriptor::E_TYPE::ET_SAMPLER))) { redirect = &layout->getImmutableSamplerRedirect(); found = redirect->findBindingStorageIndex(binding); } if (!found) - return "#error \"Could not find `::nbl::hlsl::SBindingInfo::binding` in `::nbl::hlsl::SBindingInfo::set`'s layout!\""; + return "#error \"Could not find `IPipelineLayout::SBindingKey::binding::binding` in `IPipelineLayout::SBindingKey::binding::set`'s layout!\""; } + if (redirect->getStageFlags(found).hasFlags(key.requiredStages)) + return "#error \"Binding found in the layout doesn't have all the `IPipelineLayout::SBindingKey::binding::requiredStages` flags!\""; const auto count = redirect->getCount(found); assert(count); // this layout should have never passed validation - return "::nbl::hlsl::ConstevalBindingInfo<"+std::to_string(info.set)+","+std::to_string(info.binding)+","+std::to_string(count)+">"; + return "::nbl::hlsl::ConstevalBindingInfo<"+std::to_string(key.binding.set)+","+std::to_string(key.binding.binding)+","+std::to_string(count)+">"; } protected: From 395ac581fd5c28efaf8bb4c3a7123ea6b194d6e0 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 5 Nov 2024 16:46:58 +0100 Subject: [PATCH 11/14] start using the asset converter to make Blit shaders --- examples_tests | 2 +- include/nbl/asset/IGraphicsPipeline.h | 2 +- include/nbl/video/utilities/CComputeBlit.h | 293 +++------------------ src/nbl/builtin/CMakeLists.txt | 18 -- src/nbl/video/utilities/CComputeBlit.cpp | 204 +++++++++----- 5 files changed, 188 insertions(+), 331 deletions(-) diff --git a/examples_tests b/examples_tests index f6492b0de..e95c56290 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f6492b0de975754f960a2761aaacf3a1a3354100 +Subproject commit e95c56290e7f31f3f2a2b6e07ccafd7feb2e686e diff --git a/include/nbl/asset/IGraphicsPipeline.h b/include/nbl/asset/IGraphicsPipeline.h index 5f6365525..62861fdc9 100644 --- a/include/nbl/asset/IGraphicsPipeline.h +++ b/include/nbl/asset/IGraphicsPipeline.h @@ -155,7 +155,7 @@ class IGraphicsPipeline : public IPipeline, public IGraphics protected: explicit IGraphicsPipeline(const SCreationParams& _params) : - IPipeline(core::smart_refctd_ptr(_params.layout)), + IPipeline(core::smart_refctd_ptr(_params.layout)), m_params(_params.cached), m_renderpass(core::smart_refctd_ptr(_params.renderpass)) {} SCachedCreationParams m_params; diff --git a/include/nbl/video/utilities/CComputeBlit.h b/include/nbl/video/utilities/CComputeBlit.h index 69b8d8ba2..4180ac420 100644 --- a/include/nbl/video/utilities/CComputeBlit.h +++ b/include/nbl/video/utilities/CComputeBlit.h @@ -8,9 +8,16 @@ namespace nbl::video { -class NBL_API2 CComputeBlit : public core::IReferenceCounted +class CComputeBlit : public core::IReferenceCounted { public: + constexpr static inline asset::SPushConstantRange DefaultPushConstantRange = { + .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0ull, + .size = sizeof(hlsl::blit::parameters2_t) + }; + constexpr static inline std::span DefaultPushConstantRanges = {&DefaultPushConstantRange,1}; + // Coverage adjustment needs alpha to be stored in HDR with high precision static inline asset::E_FORMAT getCoverageAdjustmentIntermediateFormat(const asset::E_FORMAT format) { @@ -41,7 +48,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted } // ctor - CComputeBlit( + NBL_API2 CComputeBlit( core::smart_refctd_ptr&& logicalDevice, core::smart_refctd_ptr&& cache=nullptr, core::smart_refctd_ptr&& logger=nullptr @@ -52,6 +59,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted { core::smart_refctd_ptr blit; core::smart_refctd_ptr coverage; + uint16_t workgroupSize; }; struct SPipelinesCreateInfo { @@ -67,13 +75,13 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted hlsl::SBindingInfo samplers; // must be Storage Image descriptor type hlsl::SBindingInfo outputs; - //! If you set the balues too small, we'll correct them ourselves anyway + //! If you set the balues too small, we'll correct them ourselves anyway, default values of 0 means we guess and provide our defaults // needs to be at least as big as the maximum subgroup size - uint32_t workgroupSizeLog2 : 4 = 0; - // - uint32_t sharedMemoryPerInvocation : 6 = 0; + uint16_t workgroupSizeLog2 : 4 = 0; + // in bytes, needs to be at least enough to store two full input pixels per invocation + uint16_t sharedMemoryPerInvocation : 6 = 0; }; - SPipelines createAndCachePipelines(const SPipelinesCreateInfo& info); + NBL_API2 SPipelines createAndCachePipelines(const SPipelinesCreateInfo& info); //! Returns the original format if supports STORAGE_IMAGE otherwise returns a format in its compat class which supports STORAGE_IMAGE. inline asset::E_FORMAT getOutputViewFormat(const asset::E_FORMAT format) @@ -99,101 +107,38 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted } } -#if 0 - // @param `alphaBinCount` is only required to size the histogram present in the default nbl_glsl_blit_AlphaStatistics_t in default_compute_common.comp - core::smart_refctd_ptr createAlphaTestSpecializedShader(const asset::IImage::E_TYPE inImageType, const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount); - - core::smart_refctd_ptr getAlphaTestPipeline(const uint32_t alphaBinCount, const asset::IImage::E_TYPE imageType) - { - const auto workgroupDims = getDefaultWorkgroupDims(imageType); - const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount); - - assert(paddedAlphaBinCount >= asset::IBlitUtilities::MinAlphaBinCount); - const auto pipelineIndex = (paddedAlphaBinCount / asset::IBlitUtilities::MinAlphaBinCount) - 1; - - if (m_alphaTestPipelines[pipelineIndex][imageType]) - return m_alphaTestPipelines[pipelineIndex][imageType]; - - auto specShader = createAlphaTestSpecializedShader(imageType, paddedAlphaBinCount); - IGPUComputePipeline::SCreationParams creationParams; - creationParams.shader.shader = specShader.get(); - creationParams.shader.entryPoint = "main"; - creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get(); - assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_alphaTestPipelines[pipelineIndex][imageType])); - - return m_alphaTestPipelines[pipelineIndex][imageType]; - } - - // @param `outFormat` dictates encoding. - core::smart_refctd_ptr createNormalizationSpecializedShader(const asset::IImage::E_TYPE inImageType, const asset::E_FORMAT outFormat, - const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount); - - core::smart_refctd_ptr getNormalizationPipeline(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat, - const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount) + // Use the return values of `getOutputViewFormat` and `getCoverageAdjustmentIntermediateFormat` for this + static inline uint32_t getAlphaBinCount(const uint16_t workgroupSize, const asset::E_FORMAT intermediateAlpha, const uint32_t layersToBlit) { - const auto workgroupDims = getDefaultWorkgroupDims(imageType); - const uint32_t paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount); - const SNormalizationCacheKey key = { imageType, paddedAlphaBinCount, outFormat }; - - if (m_normalizationPipelines.find(key) == m_normalizationPipelines.end()) + uint16_t baseBucketCount; + using format_t = nbl::asset::E_FORMAT; + switch (intermediateAlpha) { - auto specShader = createNormalizationSpecializedShader(imageType, outFormat, paddedAlphaBinCount); - IGPUComputePipeline::SCreationParams creationParams; - creationParams.shader.shader = specShader.get(); - creationParams.shader.entryPoint = "main"; - creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get(); - assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_normalizationPipelines[key])); + case format_t::EF_R8_UNORM: [[fallthrough]]; + case format_t::EF_R8_SNORM: + baseBucketCount = 256; + break; + case format_t::EF_R16_SFLOAT: + baseBucketCount = 512; + break; + case format_t::EF_R16_UNORM: [[fallthrough]]; + case format_t::EF_R16_SNORM: [[fallthrough]]; + baseBucketCount = 1024; + break; + case format_t::EF_R32_SFLOAT: + baseBucketCount = 2048; + break; + default: + return 0; } - - return m_normalizationPipelines[key]; + // the absolute minimum needed to store a single pixel of a worst case format (precise, all 4 channels) + constexpr auto singlePixelStorage = 4*sizeof(hlsl::float32_t); + constexpr auto ratio = singlePixelStorage/sizeof(uint16_t); + const auto paddedAlphaBinCount = core::min(core::roundUp(baseBucketCount,workgroupSize),workgroupSize*ratio); + return paddedAlphaBinCount*layersToBlit; } - template - core::smart_refctd_ptr getBlitPipeline( - const asset::E_FORMAT outFormat, - const asset::IImage::E_TYPE imageType, - const core::vectorSIMDu32& inExtent, - const core::vectorSIMDu32& outExtent, - const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, - const typename BlitUtilities::convolution_kernels_t& kernels, - const uint32_t workgroupSize = 256, - const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount) - { - const auto paddedAlphaBinCount = getPaddedAlphaBinCount(core::vectorSIMDu32(workgroupSize, 1, 1, 1), alphaBinCount); - - const SBlitCacheKey key = - { - .wgSize = workgroupSize, - .imageType = imageType, - .alphaBinCount = paddedAlphaBinCount, - .outFormat = outFormat, - .smemSize = m_availableSharedMemory, - .coverageAdjustment = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) - }; - - if (m_blitPipelines.find(key) == m_blitPipelines.end()) - { - const auto blitType = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) ? EBT_COVERAGE_ADJUSTMENT : EBT_REGULAR; - - auto specShader = createBlitSpecializedShader( - outFormat, - imageType, - inExtent, - outExtent, - alphaSemantic, - kernels, - workgroupSize, - paddedAlphaBinCount); - - IGPUComputePipeline::SCreationParams creationParams; - creationParams.shader.shader = specShader.get(); - creationParams.shader.entryPoint = "main"; - creationParams.layout = m_blitPipelineLayout[blitType].get(); - m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_blitPipelines[key]); - } - - return m_blitPipelines[key]; - } +#if 0 //! Returns the number of output texels produced by one workgroup, deciding factor is `m_availableSharedMemory`. //! @param outImageFormat is the format of output (of the blit step) image. @@ -368,152 +313,10 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted outDispatchInfo.wgCount[2] = workgroupCount[2]; } - static inline core::vectorSIMDu32 getDefaultWorkgroupDims(const asset::IImage::E_TYPE imageType) - { - switch (imageType) - { - case asset::IImage::ET_1D: - return core::vectorSIMDu32(256, 1, 1, 1); - case asset::IImage::ET_2D: - return core::vectorSIMDu32(16, 16, 1, 1); - case asset::IImage::ET_3D: - return core::vectorSIMDu32(8, 8, 4, 1); - default: - return core::vectorSIMDu32(1, 1, 1, 1); - } - } - - static inline size_t getCoverageAdjustmentScratchSize(const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount, const uint32_t layersToBlit) - { - if (alphaSemantic != asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) - return 0; - - const auto workgroupDims = getDefaultWorkgroupDims(imageType); - const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount); - const auto requiredSize = (sizeof(uint32_t) + paddedAlphaBinCount * sizeof(uint32_t)) * layersToBlit; - return requiredSize; - } - - bool updateDescriptorSet( - video::IGPUDescriptorSet* blitDS, - video::IGPUDescriptorSet* kernelWeightsDS, - core::smart_refctd_ptr inImageView, - core::smart_refctd_ptr outImageView, - core::smart_refctd_ptr coverageAdjustmentScratchBuffer, - core::smart_refctd_ptr kernelWeightsUTB, - const asset::ISampler::E_TEXTURE_CLAMP wrapU = asset::ISampler::ETC_CLAMP_TO_EDGE, - const asset::ISampler::E_TEXTURE_CLAMP wrapV = asset::ISampler::ETC_CLAMP_TO_EDGE, - const asset::ISampler::E_TEXTURE_CLAMP wrapW = asset::ISampler::ETC_CLAMP_TO_EDGE, - const asset::ISampler::E_TEXTURE_BORDER_COLOR borderColor = asset::ISampler::ETBC_FLOAT_OPAQUE_BLACK) - { - constexpr auto MAX_DESCRIPTOR_COUNT = 3; - - auto updateDS = [this, coverageAdjustmentScratchBuffer](video::IGPUDescriptorSet* ds, video::IGPUDescriptorSet::SDescriptorInfo* infos) -> bool - { - const auto bindingCount = ds->getLayout()->getTotalBindingCount(); - if ((bindingCount == 3) && !coverageAdjustmentScratchBuffer) - return false; - - video::IGPUDescriptorSet::SWriteDescriptorSet writes[MAX_DESCRIPTOR_COUNT] = {}; - - uint32_t infoIdx = 0; - uint32_t writeCount = 0; - for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) - { - const auto type = static_cast(t); - const auto& redirect = ds->getLayout()->getDescriptorRedirect(type); - const auto declaredBindingCount = redirect.getBindingCount(); - - for (uint32_t i = 0; i < declaredBindingCount; ++i) - { - auto& write = writes[writeCount++]; - write.dstSet = ds; - write.binding = redirect.getBinding(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i }).data; - write.arrayElement = 0u; - write.count = redirect.getCount(IGPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ i }); - write.info = &infos[infoIdx]; - - infoIdx += write.count; - } - } - assert(writeCount == bindingCount); - m_device->updateDescriptorSets(writeCount, writes, 0u, nullptr); - - return true; - }; - - if (blitDS) - { - if (!inImageView || !outImageView) - return false; - - video::IGPUDescriptorSet::SDescriptorInfo infos[MAX_DESCRIPTOR_COUNT] = {}; - - if (!samplers[wrapU][wrapV][wrapW][borderColor]) - { - video::IGPUSampler::SParams params = {}; - params.TextureWrapU = wrapU; - params.TextureWrapV = wrapV; - params.TextureWrapW = wrapW; - params.BorderColor = borderColor; - params.MinFilter = asset::ISampler::ETF_NEAREST; - params.MaxFilter = asset::ISampler::ETF_NEAREST; - params.MipmapMode = asset::ISampler::ESMM_NEAREST; - params.AnisotropicFilter = 0u; - params.CompareEnable = 0u; - params.CompareFunc = asset::ISampler::ECO_ALWAYS; - - samplers[wrapU][wrapV][wrapW][borderColor] = m_device->createSampler(params); - if (!samplers[wrapU][wrapV][wrapW][borderColor]) - return false; - } - - infos[0].desc = inImageView; - infos[0].info.image.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - infos[0].info.combinedImageSampler.sampler = samplers[wrapU][wrapV][wrapW][borderColor]; - - infos[1].desc = outImageView; - infos[1].info.image.imageLayout = asset::IImage::LAYOUT::GENERAL; - infos[1].info.combinedImageSampler.sampler = nullptr; - - if (coverageAdjustmentScratchBuffer) - { - infos[2].desc = coverageAdjustmentScratchBuffer; - infos[2].info.buffer.offset = 0; - infos[2].info.buffer.size = coverageAdjustmentScratchBuffer->getSize(); - } - - if (!updateDS(blitDS, infos)) - return false; - } - - if (kernelWeightsDS) - { - video::IGPUDescriptorSet::SDescriptorInfo info = {}; - info.desc = kernelWeightsUTB; - info.info.buffer.offset = 0ull; - info.info.buffer.size = kernelWeightsUTB->getUnderlyingBuffer()->getSize(); - - if (!updateDS(kernelWeightsDS, &info)) - return false; - } - - return true; - } - //! User is responsible for the memory barriers between previous writes and the first //! dispatch on the input image, and future reads of output image and the last dispatch. template inline void blit( - video::IGPUCommandBuffer* cmdbuf, - const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, - video::IGPUDescriptorSet* alphaTestDS, - video::IGPUComputePipeline* alphaTestPipeline, - video::IGPUDescriptorSet* blitDS, - video::IGPUDescriptorSet* blitWeightsDS, - video::IGPUComputePipeline* blitPipeline, - video::IGPUDescriptorSet* normalizationDS, - video::IGPUComputePipeline* normalizationPipeline, const core::vectorSIMDu32& inImageExtent, const asset::IImage::E_TYPE inImageType, const asset::E_FORMAT inImageFormat, @@ -627,7 +430,7 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted } //! Query shared memory size for a given `outputTexelsPerWG`. - size_t getRequiredSharedMemorySize( + inline size_t getRequiredSharedMemorySize( const core::vectorSIMDu32& outputTexelsPerWG, const core::vectorSIMDu32& outExtent, const asset::IImage::E_TYPE imageType, @@ -641,16 +444,6 @@ class NBL_API2 CComputeBlit : public core::IReferenceCounted const size_t requiredSmem = (core::max(preloadRegion.x * preloadRegion.y * preloadRegion.z, outputTexelsPerWG.x * outputTexelsPerWG.y * preloadRegion.z) + outputTexelsPerWG.x * preloadRegion.y * preloadRegion.z) * channelCount * sizeof(float); return requiredSmem; }; - - static inline uint32_t getPaddedAlphaBinCount(const core::vectorSIMDu32& workgroupDims, const uint32_t oldAlphaBinCount) - { - // For the normalization shader, it should be that: - // alphaBinCount = k*workGroupSize, k is integer, k >= 1, - assert(workgroupDims.x != 0 && workgroupDims.y != 0 && workgroupDims.z != 0); - const auto wgSize = workgroupDims.x * workgroupDims.y * workgroupDims.z; - const auto paddedAlphaBinCount = core::roundUp(oldAlphaBinCount, wgSize); - return paddedAlphaBinCount; - } }; } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 53ab53497..4dbd039b5 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -207,24 +207,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/ext/DepthPyramidGenerator/com LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/ext/DepthPyramidGenerator/push_constants_struct_common.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/ext/DepthPyramidGenerator/depth_pyramid_generator_impl.glsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/ext/DepthPyramidGenerator/virtual_work_group.glsl") -# blit -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/formats_encode.glsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/parameters.glsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/multi_dimensional_array_addressing.glsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/default_compute_common.comp") - -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/default_compute_blit.comp") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/blit/blit.glsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/blit/descriptors.glsl") - -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/default_compute_alpha_test.comp") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/alpha_test/alpha_test.glsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/alpha_test/descriptors.glsl") - -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/default_compute_normalization.comp") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/normalization.glsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/descriptors.glsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "glsl/blit/normalization/shared_normalization.glsl") # HLSL LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") diff --git a/src/nbl/video/utilities/CComputeBlit.cpp b/src/nbl/video/utilities/CComputeBlit.cpp index 1dd123952..2ad565600 100644 --- a/src/nbl/video/utilities/CComputeBlit.cpp +++ b/src/nbl/video/utilities/CComputeBlit.cpp @@ -23,36 +23,25 @@ CComputeBlit::CComputeBlit(smart_refctd_ptr&& logicalDevice, sma auto CComputeBlit::createAndCachePipelines(const SPipelinesCreateInfo& info) -> SPipelines { SPipelines retval; - core::vector> cpuPplns; - cpuPplns.reserve(tasks.size()); + + std::array,2> cpuPplns; const auto& limits = m_device->getPhysicalDevice()->getLimits(); - for (auto task : tasks) + retval.workgroupSize = 0x1u<std::string { - // adjust task default values - { - if (task.workgroupSizeLog2(task.outputFormat),3,1.f); - const auto precisionAt0 = getFormatPrecision(static_cast(task.outputFormat),3,0.f); - if (limits.workgroupMemoryExplicitLayout16BitAccess && limits.shaderFloat16 && precisionAt1>=std::exp2f(-11.f) && precisionAt0>=std::numeric_limits::min()) - useFloat16 = true; - } - // the absolute minimum needed to store a single pixel - const auto singlePixelStorage = channels*(useFloat16 ? sizeof(hlsl::float16_t):sizeof(hlsl::float32_t)); - // also slightly more memory is needed - task.sharedMemoryPerInvocation = core::max(singlePixelStorage*2,task.sharedMemoryPerInvocation); - } - const auto common = [&]()->std::string - { - std::ostringstream tmp; - tmp << R"===( + std::ostringstream tmp; + tmp << R"===( #include "nbl/builtin/hlsl/binding_info.hlsl" @@ -61,54 +50,58 @@ using namespace nbl::hlsl; struct ConstevalParameters { - NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSize = )===" << (0x1u<getBindingInfoForHLSL() << R"===(; - using input_sampler_binding_t = )===" << serializeBindingInfo() << R"===(; - using input_image_binding_t = )===" << serializeBindingInfo() << R"===(; - using output_binding_t = )===" << serializeBindingInfo() << R"===(; - NBL_CONSTEXPR_STATIC_INLINE uint32_t uint32_t SharedMemoryDWORDs = )===" << task.sharedMemoryPerInvocation/sizeof(uint32_t) << R"===(; +NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSize = )===" << retval.workgroupSize << R"===(; +using kernel_weight_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.kernelWeights,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(; +using input_sampler_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.samplers,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(; +using input_image_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.inputs,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(; +using output_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.outputs,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(; +NBL_CONSTEXPR_STATIC_INLINE uint32_t uint32_t SharedMemoryDWORDs = )===" << (sharedMemoryPerInvocation* retval.workgroupSize)/sizeof(uint32_t) << R"===(; }; )==="; - return tmp.str(); - }(); - auto createPipeline = [&limits,&common](const char* mainPath)->smart_refctd_ptr + return tmp.str(); + }(); + auto createPipeline = [&limits,layout,&common](const char* mainPath)->smart_refctd_ptr + { + auto shader = make_smart_refctd_ptr( + (common+"\n#include \""+mainPath+"\"\n").c_str(), + IShader::E_SHADER_STAGE::ESS_COMPUTE, + IShader::E_CONTENT_TYPE::ECT_HLSL, + mainPath + ); + // make sure there's a hash so asset converter doesn't fail { - auto shader = make_smart_refctd_ptr( - (common+"\n#include \""+mainPath+"\"\n").c_str(), - IShader::E_SHADER_STAGE::ESS_COMPUTE, - IShader::E_CONTENT_TYPE::ECT_HLSL, - mainPath - ); - - ICPUComputePipeline::SCreationParams params = {}; - params.layout = nullptr; // TODO - params.shader.entryPoint = "main"; - params.shader.shader = shader.get(); - params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(limits.maxSubgroupSize)); - // needed for the prefix and reductions to work - params.shader.requireFullSubgroups = true; - return ICPUComputePipeline::create(params); - }; - // create blit pipeline - cpuPplns.emplace_back(createPipeline("default_blit.comp.hlsl")); - cpuPplns.emplace_back(createPipeline("default_normalize.comp.hlsl")); - } + auto source = const_cast(shader->getContent()); + source->setContentHash(source->computeContentHash()); + } + + ICPUComputePipeline::SCreationParams params = {}; + params.layout = layout; + params.shader.entryPoint = "main"; + params.shader.shader = shader.get(); + params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(limits.maxSubgroupSize)); + // needed for the prefix and reductions to work + params.shader.requireFullSubgroups = true; + return ICPUComputePipeline::create(params); + }; + // create blit pipeline + cpuPplns[0] = createPipeline("nbl/builtin/hlsl/blit/default_blit.comp.hlsl"); + cpuPplns[1] = createPipeline("nbl/builtin/hlsl/blit/default_normalize.comp.hlsl"); CAssetConverter::SInputs inputs = {}; - inputs.readCache = converter; + inputs.readCache = info.converter; inputs.logger = m_logger.getRaw(); std::get>(inputs.assets) = {&cpuPplns.data()->get(),cpuPplns.size()}; inputs.readShaderCache = m_shaderCache.get(); inputs.writeShaderCache = m_shaderCache.get(); // no pipeline cache, because we only make the same pipeline once, ever - auto reserveResults = converter->reserve(inputs); + auto reserveResults = info.converter->reserve(inputs); assert(reserveResults.getRequiredQueueFlags().value==IQueue::FAMILY_FLAGS::NONE); + // copy over the results { auto rIt = reserveResults.getGPUObjects().data(); - // TODO: redo - for (size_t i=0; ivalue; + retval.blit = (rIt++)->value; + retval.coverage = (rIt++)->value; } // this just inserts the pipelines into the cache @@ -198,6 +191,53 @@ core::smart_refctd_ptr createBlitSpecializedShader( return gpuShader; } +template +core::smart_refctd_ptr getBlitPipeline( + const asset::E_FORMAT outFormat, + const asset::IImage::E_TYPE imageType, + const core::vectorSIMDu32& inExtent, + const core::vectorSIMDu32& outExtent, + const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic, + const typename BlitUtilities::convolution_kernels_t& kernels, + const uint32_t workgroupSize = 256, + const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount) +{ + const auto paddedAlphaBinCount = getPaddedAlphaBinCount(core::vectorSIMDu32(workgroupSize, 1, 1, 1), alphaBinCount); + + const SBlitCacheKey key = + { + .wgSize = workgroupSize, + .imageType = imageType, + .alphaBinCount = paddedAlphaBinCount, + .outFormat = outFormat, + .smemSize = m_availableSharedMemory, + .coverageAdjustment = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) + }; + + if (m_blitPipelines.find(key) == m_blitPipelines.end()) + { + const auto blitType = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) ? EBT_COVERAGE_ADJUSTMENT : EBT_REGULAR; + + auto specShader = createBlitSpecializedShader( + outFormat, + imageType, + inExtent, + outExtent, + alphaSemantic, + kernels, + workgroupSize, + paddedAlphaBinCount); + + IGPUComputePipeline::SCreationParams creationParams; + creationParams.shader.shader = specShader.get(); + creationParams.shader.entryPoint = "main"; + creationParams.layout = m_blitPipelineLayout[blitType].get(); + m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_blitPipelines[key]); + } + + return m_blitPipelines[key]; +} + core::smart_refctd_ptr CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount) { const auto workgroupDims = getDefaultWorkgroupDims(imageType); @@ -233,6 +273,28 @@ core::smart_refctd_ptr CComputeBlit::createAlphaTestSpecializ auto cpuShader = core::make_smart_refctd_ptr(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSLGLSL::createAlphaTestSpecializedShader"); } +core::smart_refctd_ptr getAlphaTestPipeline(const uint32_t alphaBinCount, const asset::IImage::E_TYPE imageType) +{ + const auto workgroupDims = getDefaultWorkgroupDims(imageType); + const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount); + + assert(paddedAlphaBinCount >= asset::IBlitUtilities::MinAlphaBinCount); + const auto pipelineIndex = (paddedAlphaBinCount / asset::IBlitUtilities::MinAlphaBinCount) - 1; + + if (m_alphaTestPipelines[pipelineIndex][imageType]) + return m_alphaTestPipelines[pipelineIndex][imageType]; + + auto specShader = createAlphaTestSpecializedShader(imageType, paddedAlphaBinCount); + IGPUComputePipeline::SCreationParams creationParams; + creationParams.shader.shader = specShader.get(); + creationParams.shader.entryPoint = "main"; + creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get(); + assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_alphaTestPipelines[pipelineIndex][imageType])); + + return m_alphaTestPipelines[pipelineIndex][imageType]; +} + +// @param `outFormat` dictates encoding. core::smart_refctd_ptr CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount) { const auto workgroupDims = getDefaultWorkgroupDims(imageType); @@ -276,4 +338,24 @@ core::smart_refctd_ptr CComputeBlit::createNormalizationSpeci auto cpuShader = core::make_smart_refctd_ptr(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSL::createNormalizationSpecializedShader"); } + +core::smart_refctd_ptr getNormalizationPipeline(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat, + const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount) +{ + const auto workgroupDims = getDefaultWorkgroupDims(imageType); + const uint32_t paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount); + const SNormalizationCacheKey key = { imageType, paddedAlphaBinCount, outFormat }; + + if (m_normalizationPipelines.find(key) == m_normalizationPipelines.end()) + { + auto specShader = createNormalizationSpecializedShader(imageType, outFormat, paddedAlphaBinCount); + IGPUComputePipeline::SCreationParams creationParams; + creationParams.shader.shader = specShader.get(); + creationParams.shader.entryPoint = "main"; + creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get(); + assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_normalizationPipelines[key])); + } + + return m_normalizationPipelines[key]; +} #endif \ No newline at end of file From e1a87e757995a1d531d5517c9a23e6d07b9dbafe Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 5 Nov 2024 17:16:24 +0100 Subject: [PATCH 12/14] stupid typos are the bane of my existence --- examples_tests | 2 +- include/nbl/asset/IPipelineLayout.h | 2 +- include/nbl/builtin/hlsl/blit/common.hlsl | 2 +- include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl | 7 +++---- include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl | 6 ++---- src/nbl/video/utilities/CComputeBlit.cpp | 2 +- 6 files changed, 9 insertions(+), 12 deletions(-) diff --git a/examples_tests b/examples_tests index e95c56290..e77ed5d46 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit e95c56290e7f31f3f2a2b6e07ccafd7feb2e686e +Subproject commit e77ed5d468f929ac5e7f1909f728895c923eb2c4 diff --git a/include/nbl/asset/IPipelineLayout.h b/include/nbl/asset/IPipelineLayout.h index 7cc980290..0eaba46f7 100644 --- a/include/nbl/asset/IPipelineLayout.h +++ b/include/nbl/asset/IPipelineLayout.h @@ -183,7 +183,7 @@ class IPipelineLayout if (!found) return "#error \"Could not find `IPipelineLayout::SBindingKey::binding::binding` in `IPipelineLayout::SBindingKey::binding::set`'s layout!\""; } - if (redirect->getStageFlags(found).hasFlags(key.requiredStages)) + if (!redirect->getStageFlags(found).hasFlags(key.requiredStages)) return "#error \"Binding found in the layout doesn't have all the `IPipelineLayout::SBindingKey::binding::requiredStages` flags!\""; const auto count = redirect->getCount(found); assert(count); // this layout should have never passed validation diff --git a/include/nbl/builtin/hlsl/blit/common.hlsl b/include/nbl/builtin/hlsl/blit/common.hlsl index 6295e6870..93ed57931 100644 --- a/include/nbl/builtin/hlsl/blit/common.hlsl +++ b/include/nbl/builtin/hlsl/blit/common.hlsl @@ -12,7 +12,7 @@ namespace hlsl { namespace glsl { -uint32_t gl_WorkGroupSize() +uint32_t3 gl_WorkGroupSize() { return uint32_t3(ConstevalParameters::WorkGroupSize,1,1); } diff --git a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl index ad2749904..c9184d016 100644 --- a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl +++ b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl @@ -1,12 +1,11 @@ // Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -//#include "nbl/builtin/hlsl/blit/common.hlsl" -//#include "nbl/builtin/hlsl/blit/parameters.hlsl" -//#include "nbl/builtin/hlsl/blit/compute_blit.hlsl" +#include "nbl/builtin/hlsl/blit/parameters.hlsl" +#include "nbl/builtin/hlsl/blit/common.hlsl" +//#include "nbl/builtin/hlsl/blit/compute_blit.hlsl" -groupshared uint32_t sMem[ConstevalParameters::SharedMemoryDWORDs]; /* struct HistogramAccessor { diff --git a/include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl b/include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl index 589f370c0..8e2f4beb2 100644 --- a/include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl +++ b/include/nbl/builtin/hlsl/blit/default_normalize.comp.hlsl @@ -1,11 +1,9 @@ // Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/builtin/hlsl/blit/common.hlsl" - +#include "nbl/builtin/hlsl/blit/parameters.hlsl" - -//#include "nbl/builtin/hlsl/blit/parameters.hlsl" +#include "nbl/builtin/hlsl/blit/common.hlsl" //#include "nbl/builtin/hlsl/blit/compute_blit.hlsl" using namespace nbl::hlsl::blit; diff --git a/src/nbl/video/utilities/CComputeBlit.cpp b/src/nbl/video/utilities/CComputeBlit.cpp index 2ad565600..c3ceb6667 100644 --- a/src/nbl/video/utilities/CComputeBlit.cpp +++ b/src/nbl/video/utilities/CComputeBlit.cpp @@ -55,7 +55,7 @@ using kernel_weight_binding_t = )===" << layout->getBindingInfoForHLSL({.binding using input_sampler_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.samplers,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(; using input_image_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.inputs,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(; using output_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.outputs,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(; -NBL_CONSTEXPR_STATIC_INLINE uint32_t uint32_t SharedMemoryDWORDs = )===" << (sharedMemoryPerInvocation* retval.workgroupSize)/sizeof(uint32_t) << R"===(; +NBL_CONSTEXPR_STATIC_INLINE uint32_t SharedMemoryDWORDs = )===" << (sharedMemoryPerInvocation* retval.workgroupSize)/sizeof(uint32_t) << R"===(; }; )==="; return tmp.str(); From c700f67ccc1dc13f7bb4127ace5c6449987ce03e Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 5 Nov 2024 19:13:48 +0100 Subject: [PATCH 13/14] Updated DXC --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index 7acfe6f4f..5adc27f9e 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit 7acfe6f4fc724265db8026256fad18afeb282b97 +Subproject commit 5adc27f9e42de7681d65a98873048af661b9b367 From 0c0b9ab86beec5595e09ae594a58e529a3f4cbb7 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 5 Nov 2024 19:22:27 +0100 Subject: [PATCH 14/14] change last place's usage of `impl::declval` to `experimental::declval` --- include/nbl/builtin/hlsl/sort/counting.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sort/counting.hlsl b/include/nbl/builtin/hlsl/sort/counting.hlsl index 12da2e9d1..1cd916ccc 100644 --- a/include/nbl/builtin/hlsl/sort/counting.hlsl +++ b/include/nbl/builtin/hlsl/sort/counting.hlsl @@ -22,7 +22,7 @@ template< typename ValueAccessor, typename HistogramAccessor, typename SharedAccessor, - typename key_t = decltype(impl::declval().get(0)), + typename key_t = decltype(experimental::declval().get(0)), bool robust=false > struct counting