Skip to content

Commit

Permalink
Merge pull request #770 from Devsh-Graphics-Programming/bindless_blit
Browse files Browse the repository at this point in the history
Pseudo-Concepts for HLSL
  • Loading branch information
devshgraphicsprogramming authored Nov 5, 2024
2 parents 45db070 + 0c0b9ab commit c586e4b
Show file tree
Hide file tree
Showing 24 changed files with 776 additions and 522 deletions.
2 changes: 1 addition & 1 deletion 3rdparty/dxc/dxc
Submodule dxc updated 222 files
2 changes: 1 addition & 1 deletion examples_tests
6 changes: 3 additions & 3 deletions include/nbl/asset/ICPUComputePipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class ICPUComputePipeline : public ICPUPipeline<IPipeline<ICPUPipelineLayout>,1>
{
if (!params.layout)
return nullptr;
auto retval = new ICPUComputePipeline(core::smart_refctd_ptr<ICPUPipelineLayout>(params.layout));
auto retval = new ICPUComputePipeline(core::smart_refctd_ptr<const ICPUPipelineLayout>(params.layout));
if (!retval->setSpecInfo(params.shader))
{
retval->drop();
Expand All @@ -48,7 +48,7 @@ class ICPUComputePipeline : public ICPUPipeline<IPipeline<ICPUPipelineLayout>,1>
using base_t::base_t;
virtual ~ICPUComputePipeline() = default;

base_t* clone_impl(core::smart_refctd_ptr<ICPUPipelineLayout>&& layout) const override
base_t* clone_impl(core::smart_refctd_ptr<const ICPUPipelineLayout>&& layout) const override
{
return new ICPUComputePipeline(std::move(layout));
}
Expand All @@ -57,7 +57,7 @@ class ICPUComputePipeline : public ICPUPipeline<IPipeline<ICPUPipelineLayout>,1>
{
if (ix!=0)
return m_stages[0].shader.get();
return m_layout.get();
return const_cast<ICPUPipelineLayout*>(m_layout.get());
}

inline int8_t stageToIndex(const ICPUShader::E_SHADER_STAGE stage) const override
Expand Down
4 changes: 2 additions & 2 deletions include/nbl/asset/ICPUGraphicsPipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class ICPUGraphicsPipeline final : public ICPUPipeline<IGraphicsPipeline<ICPUPip
using base_t::base_t;
~ICPUGraphicsPipeline() = default;

base_t* clone_impl(core::smart_refctd_ptr<ICPUPipelineLayout>&& layout) const override
base_t* clone_impl(core::smart_refctd_ptr<const ICPUPipelineLayout>&& layout) const override
{
std::array<ICPUShader::SSpecInfo,GRAPHICS_SHADER_STAGE_COUNT> _shaders;
for (auto i=0; i<GRAPHICS_SHADER_STAGE_COUNT; i++)
Expand All @@ -80,7 +80,7 @@ class ICPUGraphicsPipeline final : public ICPUPipeline<IGraphicsPipeline<ICPUPip
inline IAsset* getDependant_impl(const size_t ix) override
{
if (ix==0)
return m_layout.get();
return const_cast<ICPUPipelineLayout*>(m_layout.get());
if (ix==1)
return m_renderpass.get();
size_t stageCount = 0;
Expand Down
6 changes: 3 additions & 3 deletions include/nbl/asset/ICPUPipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ class ICPUPipeline : public IAsset, public PipelineNonAssetBase
ICPUPipelineLayout* getLayout()
{
assert(isMutable());
return PipelineNonAssetBase::m_layout.get();
return const_cast<ICPUPipelineLayout*>(PipelineNonAssetBase::m_layout.get());
}
const ICPUPipelineLayout* getLayout() const { return PipelineNonAssetBase::m_layout.get(); }

inline void setLayout(core::smart_refctd_ptr<ICPUPipelineLayout>&& _layout)
inline void setLayout(core::smart_refctd_ptr<const ICPUPipelineLayout>&& _layout)
{
assert(isMutable());
PipelineNonAssetBase::m_layout = std::move(_layout);
Expand Down Expand Up @@ -117,7 +117,7 @@ class ICPUPipeline : public IAsset, public PipelineNonAssetBase
using PipelineNonAssetBase::PipelineNonAssetBase;
virtual ~ICPUPipeline() = default;

virtual this_t* clone_impl(core::smart_refctd_ptr<ICPUPipelineLayout>&& layout) const = 0;
virtual this_t* clone_impl(core::smart_refctd_ptr<const ICPUPipelineLayout>&& layout) const = 0;
virtual int8_t stageToIndex(const ICPUShader::E_SHADER_STAGE stage) const = 0;

struct ShaderStage {
Expand Down
2 changes: 1 addition & 1 deletion include/nbl/asset/IGraphicsPipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class IGraphicsPipeline : public IPipeline<PipelineLayoutType>, public IGraphics

protected:
explicit IGraphicsPipeline(const SCreationParams& _params) :
IPipeline<PipelineLayoutType>(core::smart_refctd_ptr<PipelineLayoutType>(_params.layout)),
IPipeline<PipelineLayoutType>(core::smart_refctd_ptr<const PipelineLayoutType>(_params.layout)),
m_params(_params.cached), m_renderpass(core::smart_refctd_ptr<renderpass_t>(_params.renderpass)) {}

SCachedCreationParams m_params;
Expand Down
6 changes: 3 additions & 3 deletions include/nbl/asset/IPipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class IPipeline
struct SCreationParams
{
public:
PipelineLayout* layout = nullptr;
const PipelineLayout* layout = nullptr;

protected:
// This is not public to make sure that different pipelines only get the enums they support
Expand Down Expand Up @@ -107,9 +107,9 @@ class IPipeline
inline const PipelineLayout* getLayout() const {return m_layout.get();}

protected:
inline IPipeline(core::smart_refctd_ptr<PipelineLayout>&& _layout) : m_layout(std::move(_layout)) {}
inline IPipeline(core::smart_refctd_ptr<const PipelineLayout>&& _layout) : m_layout(std::move(_layout)) {}

core::smart_refctd_ptr<PipelineLayout> m_layout;
core::smart_refctd_ptr<const PipelineLayout> m_layout;
};

}
Expand Down
56 changes: 53 additions & 3 deletions include/nbl/asset/IPipelineLayout.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
#ifndef _NBL_ASSET_I_PIPELINE_LAYOUT_H_INCLUDED_
#define _NBL_ASSET_I_PIPELINE_LAYOUT_H_INCLUDED_

#include "nbl/macros.h"
#include "nbl/core/declarations.h"

#include <algorithm>
#include <array>

#include "nbl/macros.h"
#include "nbl/core/declarations.h"
#include "nbl/asset/IDescriptorSetLayout.h"
#include "nbl/builtin/hlsl/binding_info.hlsl"


namespace nbl::asset
Expand All @@ -21,7 +23,7 @@ namespace nbl::asset
however they serve as a fast path with regard to data upload from the
CPU and data access from the GPU.
Note that IrrlichtBaW limits push constant size to 128 bytes.
Note that Nabla limits push constant size to 128 bytes.
Push Constants are an alternative to an UBO where it performs really poorly,
mostly very small and very frequent updates. Examples of which are:
Expand Down Expand Up @@ -140,6 +142,54 @@ class IPipelineLayout
return static_cast<int32_t>(i)-1;
}

// utility function, if you compile shaders for specific layouts, not create layouts given shaders
struct SBindingKey
{
using type_bitset_t = std::bitset<static_cast<size_t>(IDescriptor::E_TYPE::ET_COUNT)>;

hlsl::SBindingInfo binding = {};
core::bitflag<IShader::E_SHADER_STAGE> requiredStages = IShader::E_SHADER_STAGE::ESS_UNKNOWN;
// could have just initialized with `~type_bitset_t()` in C++23
type_bitset_t allowedTypes = type_bitset_t((0x1u<<static_cast<size_t>(IDescriptor::E_TYPE::ET_COUNT))-1);
};
// TODO: add constraints for stage and creation flags, or just return the storage index & redirect?
core::string getBindingInfoForHLSL(const SBindingKey& key) const
{
if (key.binding.set>=DESCRIPTOR_SET_COUNT)
return "#error \"IPipelineLayout::SBindingKey::binding::set out of range!\"";
const auto* layout = m_descSetLayouts[key.binding.set].get();
if (!layout)
return "#error \"IPipelineLayout::SBindingKey::binding::set layout is nullptr!\"";
//
using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect;
using storage_range_index_t = redirect_t::storage_range_index_t;
const redirect_t* redirect;
storage_range_index_t found;
{
const redirect_t::binding_number_t binding(key.binding.binding);
for (auto t=0u; t<static_cast<size_t>(IDescriptor::E_TYPE::ET_COUNT); t++)
if (key.allowedTypes.test(t))
{
redirect = &layout->getDescriptorRedirect(static_cast<IDescriptor::E_TYPE>(t));
found = redirect->findBindingStorageIndex(binding);
if (found)
break;
}
if (!found && key.allowedTypes.test(static_cast<size_t>(IDescriptor::E_TYPE::ET_SAMPLER)))
{
redirect = &layout->getImmutableSamplerRedirect();
found = redirect->findBindingStorageIndex(binding);
}
if (!found)
return "#error \"Could not find `IPipelineLayout::SBindingKey::binding::binding` in `IPipelineLayout::SBindingKey::binding::set`'s layout!\"";
}
if (!redirect->getStageFlags(found).hasFlags(key.requiredStages))
return "#error \"Binding found in the layout doesn't have all the `IPipelineLayout::SBindingKey::binding::requiredStages` flags!\"";
const auto count = redirect->getCount(found);
assert(count); // this layout should have never passed validation
return "::nbl::hlsl::ConstevalBindingInfo<"+std::to_string(key.binding.set)+","+std::to_string(key.binding.binding)+","+std::to_string(count)+">";
}

protected:
IPipelineLayout(
const std::span<const asset::SPushConstantRange> _pcRanges,
Expand Down
33 changes: 33 additions & 0 deletions include/nbl/builtin/hlsl/binding_info.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (C) 2024 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h
#ifndef _NBL_BUILTIN_HLSL_BINDING_INFO_INCLUDED_
#define _NBL_BUILTIN_HLSL_BINDING_INFO_INCLUDED_

#include "nbl/builtin/hlsl/cpp_compat.hlsl"

namespace nbl
{
namespace hlsl
{

template<uint32_t set, uint32_t ix, uint32_t count=1>
struct ConstevalBindingInfo
{
NBL_CONSTEXPR_STATIC_INLINE uint32_t Set = set;
NBL_CONSTEXPR_STATIC_INLINE uint32_t Index = ix;
NBL_CONSTEXPR_STATIC_INLINE uint32_t Count = count;
};

// used for descriptor set layout lookups
struct SBindingInfo
{
//! binding index for a given resource
uint32_t binding : 29;
//! descriptor set index for a resource
uint32_t set : 3;
};

}
}
#endif
107 changes: 50 additions & 57 deletions include/nbl/builtin/hlsl/blit/common.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -4,84 +4,77 @@
#ifndef _NBL_BUILTIN_HLSL_BLIT_COMMON_INCLUDED_
#define _NBL_BUILTIN_HLSL_BLIT_COMMON_INCLUDED_

#include <nbl/builtin/hlsl/cpp_compat.hlsl>
#include <nbl/builtin/hlsl/binding_info.hlsl>

namespace nbl
{
namespace hlsl
{
namespace blit
namespace glsl
{
namespace impl
uint32_t3 gl_WorkGroupSize()
{
return uint32_t3(ConstevalParameters::WorkGroupSize,1,1);
}
}
}
}

template <uint32_t Dimension>
struct dim_to_image_properties { };
using namespace nbl::hlsl;

[[vk::binding(ConstevalParameters::kernel_weight_binding_t::Index,ConstevalParameters::kernel_weight_binding_t::Set)]]
Buffer<float32_t4> kernelWeights[ConstevalParameters::kernel_weight_binding_t::Count];
[[vk::binding(ConstevalParameters::input_sampler_binding_t::Index,ConstevalParameters::input_sampler_binding_t::Set)]]
SamplerState inSamp[ConstevalParameters::input_sampler_binding_t::Count];
// aliased
[[vk::binding(ConstevalParameters::input_image_binding_t::Index,ConstevalParameters::input_image_binding_t::Set)]]
Texture1DArray<float4> inAs1DArray[ConstevalParameters::input_image_binding_t::Count];
[[vk::binding(ConstevalParameters::input_image_binding_t::Index,ConstevalParameters::input_image_binding_t::Set)]]
Texture2DArray<float4> inAs2DArray[ConstevalParameters::input_image_binding_t::Count];
[[vk::binding(ConstevalParameters::input_image_binding_t::Index,ConstevalParameters::input_image_binding_t::Set)]]
Texture3D<float4> inAs3D[ConstevalParameters::input_image_binding_t::Count];
// aliased
[[vk::binding(ConstevalParameters::output_binding_t::Index,ConstevalParameters::output_binding_t::Set)]] [[vk::image_format("unknown")]]
RWTexture1DArray<float4> outAs1DArray[ConstevalParameters::output_binding_t::Count];
[[vk::binding(ConstevalParameters::output_binding_t::Index,ConstevalParameters::output_binding_t::Set)]] [[vk::image_format("unknown")]]
RWTexture2DArray<float4> outAs2DArray[ConstevalParameters::output_binding_t::Count];
[[vk::binding(ConstevalParameters::output_binding_t::Index,ConstevalParameters::output_binding_t::Set)]] [[vk::image_format("unknown")]]
RWTexture3D<float4> outAs3D[ConstevalParameters::output_binding_t::Count];

template <>
struct dim_to_image_properties<1>
{
using combined_sampler_t = Texture1DArray<float4>;
using image_t = RWTexture1DArray<float4>;

template <typename T>
static vector<T, 2> getIndexCoord(vector<T, 3> coords, uint32_t layer)
groupshared uint32_t sMem[ConstevalParameters::SharedMemoryDWORDs];
/*
struct HistogramAccessor
{
void atomicAdd(uint32_t wgID, uint32_t bucket, uint32_t v)
{
return vector<T, 2>(coords.x, layer);
InterlockedAdd(statsBuff[wgID * (ConstevalParameters::AlphaBinCount + 1) + bucket], v);
}
};

template <>
struct dim_to_image_properties<2>
struct SharedAccessor
{
using combined_sampler_t = Texture2DArray<float4>;
using image_t = RWTexture2DArray<float4>;

template <typename T>
static vector<T,3> getIndexCoord(vector<T, 3> coords, uint32_t layer)
float32_t get(float32_t idx)
{
return vector<T, 3>(coords.xy, layer);
return sMem[idx];
}
void set(float32_t idx, float32_t val)
{
sMem[idx] = val;
}
};

template <>
struct dim_to_image_properties<3>
struct InCSAccessor
{
using combined_sampler_t = Texture3D<float4>;
using image_t = RWTexture3D<float4>;

template <typename T>
static vector<T, 3> getIndexCoord(vector<T, 3> coords, uint32_t layer)
float32_t4 get(float32_t3 c, uint32_t l)
{
return vector<T,3>(coords);
return inCS.SampleLevel(inSamp, blit::impl::dim_to_image_properties<ConstevalParameters::BlitDimCount>::getIndexCoord<float32_t>(c, l), 0);
}
};

}


template<
uint32_t _WorkGroupSizeX,
uint32_t _WorkGroupSizeY,
uint32_t _WorkGroupSizeZ,
uint32_t _SMemFloatsPerChannel,
uint32_t _BlitOutChannelCount,
uint32_t _BlitDimCount,
uint32_t _AlphaBinCount>
struct consteval_parameters_t
struct OutImgAccessor
{
NBL_CONSTEXPR_STATIC_INLINE uint32_t SMemFloatsPerChannel = _SMemFloatsPerChannel;
NBL_CONSTEXPR_STATIC_INLINE uint32_t BlitOutChannelCount = _BlitOutChannelCount;
NBL_CONSTEXPR_STATIC_INLINE uint32_t BlitDimCount = _BlitDimCount;
NBL_CONSTEXPR_STATIC_INLINE uint32_t AlphaBinCount = _AlphaBinCount;
NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSizeX = _WorkGroupSizeX;
NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSizeY = _WorkGroupSizeY;
NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSizeZ = _WorkGroupSizeZ;
NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSize = WorkGroupSizeX * WorkGroupSizeY * WorkGroupSizeZ;
void set(int32_t3 c, uint32_t l, float32_t4 v)
{
outImg[blit::impl::dim_to_image_properties<ConstevalParameters::BlitDimCount>::getIndexCoord<int32_t>(c, l)] = v;
}
};

}
}
}

*/
#endif
Loading

0 comments on commit c586e4b

Please sign in to comment.