diff --git a/64_FFT/CMakeLists.txt b/64_FFT/CMakeLists.txt new file mode 100644 index 00000000..a434ff32 --- /dev/null +++ b/64_FFT/CMakeLists.txt @@ -0,0 +1,24 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/64_FFT/app_resources/common.hlsl b/64_FFT/app_resources/common.hlsl new file mode 100644 index 00000000..c6fadb8e --- /dev/null +++ b/64_FFT/app_resources/common.hlsl @@ -0,0 +1,14 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +using scalar_t = nbl::hlsl::float32_t; + +struct PushConstantData +{ + uint64_t inputAddress; + uint64_t outputAddress; + uint32_t dataElementCount; +}; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 64; +NBL_CONSTEXPR uint32_t ElementsPerThread = 8; +NBL_CONSTEXPR uint32_t complexElementCount = WorkgroupSize * ElementsPerThread; \ No newline at end of file diff --git a/64_FFT/app_resources/shader.comp.hlsl b/64_FFT/app_resources/shader.comp.hlsl new file mode 100644 index 00000000..968fcb30 --- /dev/null +++ b/64_FFT/app_resources/shader.comp.hlsl @@ -0,0 +1,62 @@ +#include "common.hlsl" +#include "nbl/builtin/hlsl/workgroup/fft.hlsl" +#include "nbl/builtin/hlsl/bda/legacy_bda_accessor.hlsl" + +[[vk::push_constant]] PushConstantData pushConstants; + +using namespace nbl::hlsl; + +// careful: change size according to Scalar type +groupshared uint32_t sharedmem[ workgroup::fft::SharedMemoryDWORDs ]; + +// Users MUST define this method for FFT to work +uint32_t3 glsl::gl_WorkGroupSize() { return uint32_t3(WorkgroupSize, 1, 1); } + +struct SharedMemoryAccessor +{ + void set(uint32_t idx, uint32_t value) + { + sharedmem[idx] = value; + } + + void get(uint32_t idx, NBL_REF_ARG(uint32_t) value) + { + value = sharedmem[idx]; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } + +}; + +struct Accessor : DoubleLegacyBdaAccessor< complex_t > +{ + static Accessor create(const uint64_t inputAddress, const uint64_t outputAddress) + { + Accessor accessor; + accessor.inputAddress = inputAddress; + accessor.outputAddress = outputAddress; + return accessor; + } + + void memoryBarrier() + { + // only one workgroup is touching any memory it wishes to trade + spirv::memoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsAcquireReleaseMask | spv::MemorySemanticsUniformMemoryMask); + } +}; + +[numthreads(WorkgroupSize,1,1)] +void main(uint32_t3 ID : SV_DispatchThreadID) +{ + Accessor accessor = Accessor::create(pushConstants.inputAddress, pushConstants.outputAddress); + SharedMemoryAccessor sharedmemAccessor; + + // FFT + + workgroup::FFT::template __call(accessor, sharedmemAccessor); + accessor.workgroupExecutionAndMemoryBarrier(); + workgroup::FFT::template __call(accessor, sharedmemAccessor); +} \ No newline at end of file diff --git a/64_FFT/config.json.template b/64_FFT/config.json.template new file mode 100644 index 00000000..717d05d5 --- /dev/null +++ b/64_FFT/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", // should be none + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/64_FFT/main.cpp b/64_FFT/main.cpp new file mode 100644 index 00000000..3b9b53c9 --- /dev/null +++ b/64_FFT/main.cpp @@ -0,0 +1,329 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + +// I've moved out a tiny part of this example into a shared header for reuse, please open and read it. +#include "nbl/application_templates/MonoDeviceApplication.hpp" +#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" + + +using namespace nbl; +using namespace core; +using namespace system; +using namespace asset; +using namespace video; + + +#include "app_resources/common.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + + +// Simple showcase of how to run FFT on a 1D array +class FFT_Test final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + + smart_refctd_ptr m_pipeline; + + smart_refctd_ptr m_utils; + + nbl::video::StreamingTransientDataBufferMT<>* m_upStreamingBuffer; + StreamingTransientDataBufferMT<>* m_downStreamingBuffer; + smart_refctd_ptr m_deviceLocalBuffer; + + // These are Buffer Device Addresses + uint64_t m_upStreamingBufferAddress; + uint64_t m_downStreamingBufferAddress; + uint64_t m_deviceLocalBufferAddress; + + // You can ask the `nbl::core::GeneralpurposeAddressAllocator` used internally by the Streaming Buffers give out offsets aligned to a certain multiple (not only Power of Two!) + uint32_t m_alignment; + + // This example really lets the advantages of a timeline semaphore shine through! + smart_refctd_ptr m_timeline; + uint64_t semaphorValue = 0; + +public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + FFT_Test(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + // we stuff all our work here because its a "single shot" app + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + // this time we load a shader directly from a file + smart_refctd_ptr shader; + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl", lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return logFail("Could not load shader!"); + + // Cast down the asset to its proper type + auto source = IAsset::castDown(assets[0]); + // The down-cast should not fail! + assert(source); + + // Compile directly to IGPUShader + shader = m_device->createShader(source.get()); + if (!shader) + return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); + } + + // Create massive upload/download buffers + constexpr uint32_t DownstreamBufferSize = sizeof(scalar_t) << 23; + constexpr uint32_t UpstreamBufferSize = sizeof(scalar_t) << 23; + + m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); + if (!m_utils) + return logFail("Failed to create Utilities!"); + m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); + m_downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); + m_upStreamingBufferAddress = m_upStreamingBuffer->getBuffer()->getDeviceAddress(); + m_downStreamingBufferAddress = m_downStreamingBuffer->getBuffer()->getDeviceAddress(); + + // Create device-local buffer + { + const uint32_t scalarElementCount = 2 * complexElementCount; + IGPUBuffer::SCreationParams deviceLocalBufferParams = {}; + + IQueue* const queue = getComputeQueue(); + uint32_t queueFamilyIndex = queue->getFamilyIndex(); + + deviceLocalBufferParams.queueFamilyIndexCount = 1; + deviceLocalBufferParams.queueFamilyIndices = &queueFamilyIndex; + deviceLocalBufferParams.size = sizeof(scalar_t) * scalarElementCount; + deviceLocalBufferParams.usage = nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; + + m_deviceLocalBuffer = m_device->createBuffer(std::move(deviceLocalBufferParams)); + auto mreqs = m_deviceLocalBuffer->getMemoryReqs(); + mreqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + auto gpubufMem = m_device->allocate(mreqs, m_deviceLocalBuffer.get(), IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT); + + m_deviceLocalBufferAddress = m_deviceLocalBuffer.get()->getDeviceAddress(); + } + + const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE,.offset = 0,.size = sizeof(PushConstantData) }; + + { + auto layout = m_device->createPipelineLayout({ &pcRange,1 }); + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout.get(); + params.shader.shader = shader.get(); + params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(m_physicalDevice->getLimits().maxSubgroupSize)); + params.shader.requireFullSubgroups = true; + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + return logFail("Failed to create compute pipeline!\n"); + } + + const auto& deviceLimits = m_device->getPhysicalDevice()->getLimits(); + // The ranges of non-coherent mapped memory you flush or invalidate need to be aligned. You'll often see a value of 64 reported by devices + // which just happens to coincide with a CPU cache line size. So we ask our streaming buffers during allocation to give us properly aligned offsets. + // Sidenote: For SSBOs, UBOs, BufferViews, Vertex Buffer Bindings, Acceleration Structure BDAs, Shader Binding Tables, Descriptor Buffers, etc. + // there is also a requirement to bind buffers at offsets which have a certain alignment. Memory binding to Buffers and Images also has those. + // We'll align to max of coherent atom size even if the memory is coherent, + // and we also need to take into account BDA shader loads need to be aligned to the type being loaded. + m_alignment = core::max(deviceLimits.nonCoherentAtomSize, alignof(float)); + + // Semaphor used here to know the FFT is done before download + m_timeline = m_device->createSemaphore(semaphorValue); + + IQueue* const queue = getComputeQueue(); + + // Note that I'm using the sample struct with methods that have identical code which compiles as both C++ and HLSL + auto rng = nbl::hlsl::Xoroshiro64StarStar::construct({ semaphorValue ^ 0xdeadbeefu,std::hash()(_NBL_APP_NAME_) }); + + const uint32_t scalarElementCount = 2 * complexElementCount; + const uint32_t inputSize = sizeof(scalar_t) * scalarElementCount; + + // Just need a single suballocation in this example + const uint32_t AllocationCount = 1; + + // It comes with a certain drawback that you need to remember to initialize your "yet unallocated" offsets to the Invalid value + // this is to allow a set of allocations to fail, and you to re-try after doing something to free up space without repacking args. + auto inputOffset = m_upStreamingBuffer->invalid_value; + + // We always just wait till an allocation becomes possible (during allocation previous "latched" frees get their latch conditions polled) + // Freeing of Streaming Buffer Allocations can and should be deferred until an associated polled event signals done (more on that later). + std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); + // note that the API takes a time-point not a duration, because there are multiple waits and preemptions possible, so the durations wouldn't add up properly + m_upStreamingBuffer->multi_allocate(waitTill, AllocationCount, &inputOffset, &inputSize, &m_alignment); + + // Generate our data in-place on the allocated staging buffer. Packing is interleaved in this example! + { + auto* const inputPtr = reinterpret_cast(reinterpret_cast(m_upStreamingBuffer->getBufferPointer()) + inputOffset); + std::cout << "Begin array CPU\n"; + for (auto j = 0; j < complexElementCount; j++) + { + //Random array + + //scalar_t x = rng() / scalar_t(nbl::hlsl::numeric_limits::max), y = rng() / scalar_t(nbl::hlsl::numeric_limits::max); + + // FFT( (1,0), (0,0), (0,0),... ) = (1,0), (1,0), (1,0),... + + + scalar_t x = j > 0 ? 0.f : 1.f; + scalar_t y = 0; + + + // FFT( (c,0), (c,0), (c,0),... ) = (Nc,0), (0,0), (0,0),... + + /* + scalar_t x = 1.f; + scalar_t y = 0.f; + */ + + inputPtr[2 * j] = x; + inputPtr[2 * j + 1] = y; + std::cout << "(" << x << ", " << y << "), "; + } + std::cout << "\nEnd array CPU\n"; + // Always remember to flush! + if (m_upStreamingBuffer->needsManualFlushOrInvalidate()) + { + const auto bound = m_upStreamingBuffer->getBuffer()->getBoundMemory(); + const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset + inputOffset, inputSize); + m_device->flushMappedMemoryRanges(1, &range); + } + } + + // finally allocate our output range + const uint32_t outputSize = inputSize; + + auto outputOffset = m_downStreamingBuffer->invalid_value; + m_downStreamingBuffer->multi_allocate(waitTill, AllocationCount, &outputOffset, &outputSize, &m_alignment); + + smart_refctd_ptr cmdbuf; + { + smart_refctd_ptr cmdpool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdbuf)) { + return logFail("Failed to create Command Buffers!\n"); + } + cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &cmdbuf,1 }, core::smart_refctd_ptr(m_logger)); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->bindComputePipeline(m_pipeline.get()); + // This is the new fun part, pushing constants + const PushConstantData pc = { + .inputAddress = m_deviceLocalBufferAddress, + .outputAddress = m_deviceLocalBufferAddress, + .dataElementCount = scalarElementCount + }; + IGPUCommandBuffer::SBufferCopy copyInfo = {}; + copyInfo.srcOffset = 0; + copyInfo.dstOffset = 0; + copyInfo.size = m_deviceLocalBuffer->getSize(); + cmdbuf->copyBuffer(m_upStreamingBuffer->getBuffer(), m_deviceLocalBuffer.get(), 1, ©Info); + cmdbuf->pushConstants(m_pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0u, sizeof(pc), &pc); + // Remember we do a single workgroup per 1D array in these parts + cmdbuf->dispatch(1, 1, 1); + + // Pipeline barrier: wait for FFT shader to be done before copying to downstream buffer + IGPUCommandBuffer::SPipelineBarrierDependencyInfo pipelineBarrierInfo = {}; + + decltype(pipelineBarrierInfo)::buffer_barrier_t barrier = {}; + pipelineBarrierInfo.bufBarriers = { &barrier, 1u }; + + barrier.range.buffer = m_deviceLocalBuffer; + + barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS; + barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS; + + cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS(0), pipelineBarrierInfo); + cmdbuf->copyBuffer(m_deviceLocalBuffer.get(), m_downStreamingBuffer->getBuffer(), 1, ©Info); + cmdbuf->end(); + } + + semaphorValue++; + { + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = + { + .cmdbuf = cmdbuf.get() + }; + const IQueue::SSubmitInfo::SSemaphoreInfo signalInfo = + { + .semaphore = m_timeline.get(), + .value = semaphorValue, + .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + }; + + const IQueue::SSubmitInfo submitInfo = { + .waitSemaphores = {}, + .commandBuffers = {&cmdbufInfo,1}, + .signalSemaphores = {&signalInfo,1} + }; + + queue->startCapture(); + queue->submit({ &submitInfo,1 }); + queue->endCapture(); + } + + // We let all latches know what semaphore and counter value has to be passed for the functors to execute + const ISemaphore::SWaitInfo futureWait = { m_timeline.get(),semaphorValue }; + + // As promised, we can defer an upstreaming buffer deallocation until a fence is signalled + // You can also attach an additional optional IReferenceCounted derived object to hold onto until deallocation. + m_upStreamingBuffer->multi_deallocate(AllocationCount, &inputOffset, &inputSize, futureWait); + + // Now a new and even more advanced usage of the latched events, we make our own refcounted object with a custom destructor and latch that like we did the commandbuffer. + // Instead of making our own and duplicating logic, we'll use one from IUtilities meant for down-staging memory. + // Its nice because it will also remember to invalidate our memory mapping if its not coherent. + auto latchedConsumer = make_smart_refctd_ptr( + IDeviceMemoryAllocation::MemoryRange(outputOffset, outputSize), + // Note the use of capture by-value [=] and not by-reference [&] because this lambda will be called asynchronously whenever the event signals + [=](const size_t dstOffset, const void* bufSrc, const size_t size)->void + { + // The unused variable is used for letting the consumer know the subsection of the output we've managed to download + // But here we're sure we can get the whole thing in one go because we allocated the whole range ourselves. + assert(dstOffset == 0 && size == outputSize); + + std::cout << "Begin array GPU\n"; + scalar_t* const data = reinterpret_cast(const_cast(bufSrc)); + for (auto i = 0u; i < complexElementCount; i++) { + std::cout << "(" << data[2 * i] << ", " << data[2 * i + 1] << "), "; + } + + std::cout << "\nEnd array GPU\n"; + }, + // Its also necessary to hold onto the commandbuffer, even though we take care to not reset the parent pool, because if it + // hits its destructor, our automated reference counting will drop all references to objects used in the recorded commands. + // It could also be latched in the upstreaming deallocate, because its the same fence. + std::move(cmdbuf), m_downStreamingBuffer + ); + // We put a function we want to execute + m_downStreamingBuffer->multi_deallocate(AllocationCount, &outputOffset, &outputSize, futureWait, &latchedConsumer.get()); + + return true; + } + + // One-shot App + bool keepRunning() override { return false; } + + // One-shot App + void workLoopBody() override{} + + // Cleanup + bool onAppTerminated() override + { + // Need to make sure that there are no events outstanding if we want all lambdas to eventually execute before `onAppTerminated` + // (the destructors of the Command Pool Cache and Streaming buffers will still wait for all lambda events to drain) + while (m_downStreamingBuffer->cull_frees()) {} + return device_base_t::onAppTerminated(); + } +}; + + +NBL_MAIN_FUNC(FFT_Test) \ No newline at end of file diff --git a/64_FFT/pipeline.groovy b/64_FFT/pipeline.groovy new file mode 100644 index 00000000..1a7b043a --- /dev/null +++ b/64_FFT/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CStreamingAndBufferDeviceAddressBuilder extends IBuilder +{ + public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info) +} + +return this \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bc4ffc2..0d485d3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,5 +64,6 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(61_UI EXCLUDE_FROM_ALL) add_subdirectory(62_CAD EXCLUDE_FROM_ALL) add_subdirectory(62_SchusslerTest EXCLUDE_FROM_ALL) + add_subdirectory(64_FFT EXCLUDE_FROM_ALL) add_subdirectory(0_ImportanceSamplingEnvMaps EXCLUDE_FROM_ALL) #TODO: integrate back into 42 endif()