From d956d3db1f4c8a8b751267a5e2d30cb5e2744838 Mon Sep 17 00:00:00 2001 From: Erdal Mutlu Date: Fri, 28 Jun 2024 15:36:46 -0700 Subject: [PATCH] updated local tensor implementation --- src/tamm/CMakeLists.txt | 1 + src/tamm/local_tensor.hpp | 354 ++++++++++++++++++++++++++++++++ src/tamm/tamm.hpp | 1 + src/tamm/tensor.hpp | 188 ----------------- tests/tamm/Test_LocalTensor.cpp | 235 +++++++++++++++++++-- 5 files changed, 576 insertions(+), 203 deletions(-) create mode 100644 src/tamm/local_tensor.hpp diff --git a/src/tamm/CMakeLists.txt b/src/tamm/CMakeLists.txt index 31def5a65..b33863116 100644 --- a/src/tamm/CMakeLists.txt +++ b/src/tamm/CMakeLists.txt @@ -45,6 +45,7 @@ set(TAMM_INCLUDES range.hpp ops.hpp scheduler.hpp + local_tensor.hpp tensor.hpp tensor_impl.hpp tensor_base.hpp diff --git a/src/tamm/local_tensor.hpp b/src/tamm/local_tensor.hpp new file mode 100644 index 000000000..0b8506d56 --- /dev/null +++ b/src/tamm/local_tensor.hpp @@ -0,0 +1,354 @@ +#pragma once + +#include "tamm/tensor.hpp" + +namespace tamm { + +// template +// class LabeledTensor; + +/// @brief Creates a local copy of the distributed tensor +/// @tparam T Data type for the tensor being made local +template +class LocalTensor: public Tensor { // move to another hpp +public: + LocalTensor() = default; + LocalTensor(LocalTensor&&) = default; + LocalTensor(const LocalTensor&) = default; + LocalTensor& operator=(LocalTensor&&) = default; + LocalTensor& operator=(const LocalTensor&) = default; + ~LocalTensor() = default; + + // LocalTensor(Tensor dist_tensor): dist_tensor_(dist_tensor) { construct_local_tensor(); } + + LocalTensor(std::initializer_list tiss): + Tensor(construct_local_tis_vec(TiledIndexSpaceVec(tiss))) {} + + LocalTensor(std::vector tiss): Tensor(construct_local_tis_vec(tiss)) {} + + LocalTensor(std::initializer_list tis_labels): + Tensor(construct_local_tis_vec(IndexLabelVec(tis_labels))) {} + + LocalTensor(std::initializer_list dim_sizes): + Tensor(construct_tis_vec(std::vector(dim_sizes))) {} + + LocalTensor(std::vector dim_sizes): Tensor(construct_tis_vec(dim_sizes)) {} + + /// @brief + /// @tparam ...Args + /// @param ...rest + /// @return + template + LabeledTensor operator()(Args&&... rest) const { + return LabeledTensor{*this, std::forward(rest)...}; + } + + // void write_back_to_dist() { fill_distributed_tensor(); } + + /// @brief + /// @param val + void init(T val) { + EXPECTS_STR(this->is_allocated(), "LocalTensor has to be allocated"); + + auto ec = this->execution_context(); + Scheduler{*ec}((*this)() = val).execute(); + } + + /// @brief + /// @param indices + /// @param val + void set(std::vector indices, T val) { + EXPECTS_STR(this->is_allocated(), "LocalTensor has to be allocated"); + EXPECTS_STR(indices.size() == this->num_modes(), + "Number of indices must match the number of dimensions"); + size_t linearIndex = compute_linear_index(indices); + + this->access_local_buf()[linearIndex] = val; + } + + /// @brief + /// @param indices + /// @return + T get(const std::vector& indices) const { + EXPECTS_STR(indices.size() == this->num_modes(), + "Number of indices must match the number of dimensions"); + size_t linearIndex = compute_linear_index(indices); + + return this->access_local_buf()[linearIndex]; + } + + /// @brief + /// @tparam ...Args + /// @param ...args + /// @return + template + T get(Args... args) { + std::vector indices; + unpack(indices, args...); + EXPECTS_STR(indices.size() == this->num_modes(), + "Number of indices must match the number of dimensions"); + size_t linearIndex = compute_linear_index(indices); + + return this->access_local_buf()[linearIndex]; + } + + /// @brief + /// @param new_sizes + template + void resize(Args... args) { + std::vector new_sizes; + unpack(new_sizes, args...); + EXPECTS_STR(new_sizes.size() == (*this).num_modes(), + "Number of new sizes must match the number of dimensions"); + resize(std::vector{new_sizes}); + } + + /// @brief + /// @param new_sizes + void resize(const std::vector& new_sizes) { + EXPECTS_STR((*this).is_allocated(), "LocalTensor has to be allocated!"); + auto num_dims = (*this).num_modes(); + EXPECTS_STR(num_dims == new_sizes.size(), + "Number of new sizes must match the number of dimensions."); + + for(size_t i = 0; i < new_sizes.size(); i++) { + EXPECTS_STR(new_sizes[i] != 0, "New size should be larger than 0."); + } + + LocalTensor resizedTensor; + + auto dimensions = (*this).dim_sizes(); + + if(dimensions == new_sizes) return; + + if(isWithinOldDimensions(new_sizes)) { + std::vector offsets(new_sizes.size(), 0); + resizedTensor = (*this).block(offsets, new_sizes); + } + else { + resizedTensor = LocalTensor{new_sizes}; + resizedTensor.allocate((*this).execution_context()); + (*this).copy_to_bigger(resizedTensor); + } + + auto old_tensor = (*this); + (*this) = resizedTensor; + old_tensor.deallocate(); + } + + // /// @brief + // /// @param sbuf + // /// @param block_dims + // /// @param block_offset + // /// @param copy_to_local + // void patch_copy_local(std::vector& sbuf, const std::vector& block_dims, + // const std::vector& block_offset, bool copy_to_local) { + // auto num_dims = local_tensor_.num_modes(); + // // Compute the total number of elements to copy + // size_t total_elements = 1; + // for(size_t dim: block_dims) { total_elements *= dim; } + + // // Initialize indices to the starting offset + // std::vector indices(block_offset); + + // for(size_t c = 0; c < total_elements; ++c) { + // // Access the tensor element at the current indices + // if(copy_to_local) (*this)(indices) = sbuf[c]; + // else sbuf[c] = (*this)(indices); + + // // Increment indices + // for(int dim = num_dims - 1; dim >= 0; --dim) { + // if(++indices[dim] < block_offset[dim] + block_dims[dim]) { break; } + // indices[dim] = block_offset[dim]; + // } + // } + // } + + /// @brief + /// @param bigger_tensor + void copy_to_bigger(LocalTensor& bigger_tensor) const { + auto smallerDims = (*this).dim_sizes(); + + // Helper lambda to iterate over all indices of a tensor + auto iterateIndices = [](const std::vector& dims) { + std::vector indices(dims.size(), 0); + bool done = false; + return [=]() mutable { + if(done) return std::optional>{}; + auto current = indices; + for(int i = indices.size() - 1; i >= 0; --i) { + if(++indices[i] < dims[i]) break; + if(i == 0) { + done = true; + break; + } + indices[i] = 0; + } + return std::optional>{current}; + }; + }; + + auto smallerIt = iterateIndices(smallerDims); + while(auto indices = smallerIt()) { + auto bigIndices = *indices; + bigger_tensor.set(bigIndices, (*this).get(*indices)); + } + } + + /// @brief + /// @param start_offsets + /// @param span_sizes + /// @return + LocalTensor block(const std::vector& start_offsets, + const std::vector& span_sizes) const { + EXPECTS_STR((*this).is_allocated(), "LocalTensor has to be allocated!"); + auto num_dims = (*this).num_modes(); + EXPECTS_STR(num_dims == start_offsets.size(), + "Number of start offsets should match the number of dimensions."); + EXPECTS_STR(num_dims == span_sizes.size(), + "Number of span sizes should match the number of dimensions."); + + // this has to be allocated + // offsets should be within limits + // offset + span size should be within limit + + // Create a local tensor for the block + LocalTensor blockTensor{span_sizes}; + blockTensor.allocate(this->execution_context()); + + // Iterate over all dimensions to copy the block + std::vector indices(num_dims, 0); + std::vector source_indices = start_offsets; + + bool done = false; + while(!done) { + // Copy the element + blockTensor.set(indices, (*this).get(source_indices)); + + // Update indices + done = true; + for(size_t i = 0; i < num_dims; ++i) { + if(++indices[i] < span_sizes[i]) { + ++source_indices[i]; + done = false; + break; + } + else { + indices[i] = 0; + source_indices[i] = start_offsets[i]; + } + } + } + + return blockTensor; + } + + /// @brief + /// @param x_offset + /// @param y_offset + /// @param x_span + /// @param y_span + /// @return + LocalTensor block(size_t x_offset, size_t y_offset, size_t x_span, size_t y_span) const { + auto num_dims = (*this).num_modes(); + EXPECTS_STR(num_dims == 2, "This block method only works for 2-D tensors!"); + + return block({x_offset, y_offset}, {x_span, y_span}); + } + + /// @brief + /// @return + std::vector dim_sizes() const { + std::vector dimensions; + + for(const auto& tis: (*this).tiled_index_spaces()) { + dimensions.push_back(tis.max_num_indices()); + } + + return dimensions; + } + +private: + /// @brief + /// @param tiss + /// @return + TiledIndexSpaceVec construct_local_tis_vec(std::vector tiss) { + std::vector dim_sizes; + + for(const auto& tis: tiss) { dim_sizes.push_back(tis.max_num_indices()); } + + return construct_tis_vec(dim_sizes); + } + + /// @brief + /// @param tis_labels + /// @return + TiledIndexSpaceVec construct_local_tis_vec(std::vector tis_labels) { + std::vector dim_sizes; + + for(const auto& tis_label: tis_labels) { + dim_sizes.push_back(tis_label.tiled_index_space().max_num_indices()); + } + + return construct_tis_vec(dim_sizes); + } + + /// @brief + /// @param dim_sizes + /// @return + TiledIndexSpaceVec construct_tis_vec(std::vector dim_sizes) { + TiledIndexSpaceVec local_tis_vec; + for(const auto& dim_size: dim_sizes) { + local_tis_vec.push_back(TiledIndexSpace{IndexSpace{range(dim_size)}, dim_size}); + } + + return local_tis_vec; + } + + /// @brief Method for constructing the linearized index for a given location on the local tensor + /// @param indices The index for the corresponding location wanted to be accessed + /// @return The linear position to the local memory manager + size_t compute_linear_index(const std::vector& indices) const { + auto num_modes = this->num_modes(); + std::vector dims = (*this).dim_sizes(); + size_t index = 0; + size_t stride = 1; + + for(size_t i = 0; i < num_modes; ++i) { + index += indices[num_modes - 1 - i] * stride; + stride *= dims[num_modes - 1 - i]; + } + + return index; + } + + /// @brief + /// @param indices + /// @return + bool isWithinOldDimensions(const std::vector& indices) const { + std::vector dimensions = (*this).dim_sizes(); + + for(size_t i = 0; i < indices.size(); ++i) { + if(indices[i] > dimensions[i]) { return false; } + } + return true; + } + + /// @brief Helper method that will unpack the variadic template for operator() + /// @param indices A reference to the vector of indices + /// @param index The last index that is provided to the operator() + void unpack(std::vector& indices, size_t index) { indices.push_back(index); } + + /// @brief Helper method that will unpack the variadic template for operator() + /// @tparam ...Args The variadic template from the arguments to the operator() + /// @param indices A reference to the vector of indices + /// @param next Unpacked index for the operator() + /// @param ...rest The rest of the variadic template that will be unpacked in the recursive calls + template + void unpack(std::vector& indices, size_t next, Args... rest) { + indices.push_back(next); + unpack(indices, rest...); + } +}; + +} // namespace tamm diff --git a/src/tamm/tamm.hpp b/src/tamm/tamm.hpp index c24093916..27635bf70 100644 --- a/src/tamm/tamm.hpp +++ b/src/tamm/tamm.hpp @@ -19,6 +19,7 @@ #include "tamm/execution_context.hpp" #include "tamm/index_space.hpp" #include "tamm/labeled_tensor.hpp" +#include "tamm/local_tensor.hpp" #include "tamm/ops.hpp" #include "tamm/rmm_memory_pool.hpp" #include "tamm/scheduler.hpp" diff --git a/src/tamm/tensor.hpp b/src/tamm/tensor.hpp index 494edfda0..b824ebf52 100644 --- a/src/tamm/tensor.hpp +++ b/src/tamm/tensor.hpp @@ -662,192 +662,4 @@ class IndexedTensor: public std::pair, IndexVector> { template IndexedTensor(Tensor, IndexVector) -> IndexedTensor; -/// @brief Creates a local copy of the distributed tensor -/// @tparam T Data type for the tensor being made local -template -class LocalTensor { -public: - LocalTensor() = default; - LocalTensor(LocalTensor&&) = default; - LocalTensor(const LocalTensor&) = default; - LocalTensor& operator=(LocalTensor&&) = default; - LocalTensor& operator=(const LocalTensor&) = default; - ~LocalTensor() = default; - - LocalTensor(Tensor dist_tensor): dist_tensor_(dist_tensor) { construct_local_tensor(); } - - /// @brief Overload for the parenthesis operation that gets a variadic template input for the - /// accessing indices - /// @tparam ...Args Variadic template for the indices to be access - /// @param ...args Input indices for accessing - /// @return A mutable reference to the value for the corresponding index in the local memory - /// region - template - T& operator()(Args... args) { - std::vector indices; - unpack(indices, args...); - EXPECTS_STR(indices.size() == local_tensor_.num_modes(), - "Number of indices must match the number of dimensions"); - size_t linearIndex = compute_linear_index(indices); - return local_tensor_.access_local_buf()[linearIndex]; - } - - /// @brief Overload for the parenthesis operation that gets a variadic template input for the - /// accessing indices - /// @tparam ...Args Variadic template for the indices to be access - /// @param ...args Input indices for accessing - /// @return - template - const T& operator()(Args... args) const { - std::vector indices; - unpack(indices, args...); - EXPECTS_STR(indices.size() == local_tensor_.num_modes(), - "Number of indices must match the number of dimensions"); - size_t linearIndex = compute_linear_index(indices); - return local_tensor_.access_local_buf()[linearIndex]; - } - - /// @brief Overload for the parenthesis operation that gets an index vector for the access - /// @param indices Vector of indices to be access - /// @return A mutable reference to the value for the corresponding index in the local memory - /// region - T& operator()(const std::vector& indices) { - EXPECTS_STR(indices.size() == local_tensor_.num_modes(), - "Number of indices must match the number of dimensions"); - size_t linearIndex = compute_linear_index(indices); - return local_tensor_.access_local_buf()[linearIndex]; - } - - /// @brief Overload for the parenthesis operation that gets an index vector for the access - /// @param indices Vector of indices to be access - /// @return An immutable reference to the value for the corresponding index in the local memory - /// region - template - const T& operator()(const std::vector& indices) const { - EXPECTS_STR(indices.size() == local_tensor_.num_modes(), - "Number of indices must match the number of dimensions"); - size_t linearIndex = compute_linear_index(indices); - return local_tensor_.access_local_buf()[linearIndex]; - } - - void write_back_to_dist() { fill_distributed_tensor(); } - -private: - /// @brief reference to the source distributed tensor - Tensor dist_tensor_; - /// @brief reference to the local tensor created from the source tensor - Tensor local_tensor_; - - /// @brief Method for constructing the local copy of the source tensor using local memory manager - /// The construction start with constructing new tiled index spaces from the original - /// tensor and constructing a new local tensor that uses local memory manager - void construct_local_tensor() { - auto tiss = dist_tensor_.tiled_index_spaces(); - TiledIndexSpaceVec local_tiss; - for(const auto& tis: tiss) { - local_tiss.push_back(TiledIndexSpace{tis.index_space(), tis.index_space().max_num_indices()}); - } - - EXPECTS(dist_tensor_.is_allocated()); - auto ec = dist_tensor_.execution_context(); - - ExecutionContext local_ec{ec->pg(), DistributionKind::nw, MemoryManagerKind::local}; - - local_tensor_ = Tensor{local_tiss}; - local_tensor_.allocate(ec); - fill_local_tensor(); - } - - /// @brief Method for constructing the linearized index for a given location on the local tensor - /// @param indices The index for the corresponding location wanted to be accessed - /// @return The linear position to the local memory manager - size_t compute_linear_index(const std::vector& indices) const { - auto num_modes = local_tensor_.num_modes(); - std::vector dims; - for(auto tis: local_tensor_.tiled_index_spaces()) { dims.push_back(tis.max_num_indices()); } - size_t index = 0; - size_t stride = 1; - - for(size_t i = 0; i < num_modes; ++i) { - index += indices[num_modes - 1 - i] * stride; - stride *= dims[num_modes - 1 - i]; - } - - return index; - } - - /// @brief Method for filling the local tensor data with the original distributed tensor. - /// We first construct a loop nest and to a get on all blocks that are then written to the - /// corresponding place in the new local tensor - void fill_local_tensor() { - for(const auto& blockid: dist_tensor_.loop_nest()) { - const tamm::TAMM_SIZE size = dist_tensor_.block_size(blockid); - std::vector buf(size); - dist_tensor_.get(blockid, buf); - auto block_dims = dist_tensor_.block_dims(blockid); - auto block_offset = dist_tensor_.block_offsets(blockid); - patch_copy_local(buf, block_dims, block_offset, true); - } - } - - /// @brief Method for filling the original distributed tensor data with the local tensor. - /// We first construct a loop nest and to a get on all blocks that are then written to the - /// corresponding place in the distributed tensor - void fill_distributed_tensor() { - for(const auto& blockid: dist_tensor_.loop_nest()) { - const tamm::TAMM_SIZE size = dist_tensor_.block_size(blockid); - std::vector buf(size); - dist_tensor_.get(blockid, buf); - auto block_dims = dist_tensor_.block_dims(blockid); - auto block_offset = dist_tensor_.block_offsets(blockid); - patch_copy_local(buf, block_dims, block_offset, false); - dist_tensor_.put(blockid, buf); - } - } - - /// @brief A helper method that copy a block of that to a corresponding patch in the local copy - /// @param sbuf Block data that wants to be copied - /// @param block_dims Block dimensions to find the accurate location in the linearized local - /// tensor - /// @param block_offset The offsets of the input data from the original multidimensional tensor - void patch_copy_local(std::vector& sbuf, const std::vector& block_dims, - const std::vector& block_offset, bool copy_to_local) { - auto num_dims = local_tensor_.num_modes(); - // Compute the total number of elements to copy - size_t total_elements = 1; - for(size_t dim: block_dims) { total_elements *= dim; } - - // Initialize indices to the starting offset - std::vector indices(block_offset); - - for(size_t c = 0; c < total_elements; ++c) { - // Access the tensor element at the current indices - if(copy_to_local) (*this)(indices) = sbuf[c]; - else sbuf[c] = (*this)(indices); - - // Increment indices - for(int dim = num_dims - 1; dim >= 0; --dim) { - if(++indices[dim] < block_offset[dim] + block_dims[dim]) { break; } - indices[dim] = block_offset[dim]; - } - } - } - - /// @brief Helper method that will unpack the variadic template for operator() - /// @param indices A reference to the vector of indices - /// @param index The last index that is provided to the operator() - void unpack(std::vector& indices, size_t index) { indices.push_back(index); } - - /// @brief Helper method that will unpack the variadic template for operator() - /// @tparam ...Args The variadic template from the arguments to the operator() - /// @param indices A reference to the vector of indices - /// @param next Unpacked index for the operator() - /// @param ...rest The rest of the variadic template that will be unpacked in the recursive calls - template - void unpack(std::vector& indices, size_t next, Args... rest) { - indices.push_back(next); - unpack(indices, rest...); - } -}; - } // namespace tamm diff --git a/tests/tamm/Test_LocalTensor.cpp b/tests/tamm/Test_LocalTensor.cpp index 619bcd719..267823062 100644 --- a/tests/tamm/Test_LocalTensor.cpp +++ b/tests/tamm/Test_LocalTensor.cpp @@ -4,8 +4,56 @@ using namespace tamm; +bool check_local_tis_sizes(const TiledIndexSpace& l_tis, size_t expected_size) { + return (l_tis.max_num_indices() == expected_size && l_tis.tile_size(0) == expected_size && + l_tis.input_tile_size() == expected_size); +} + template -void test_local_tensor(Scheduler& sch, size_t N, Tile tilesize) { +bool check_local_tensor_sizes(const LocalTensor& l_tensor, + const std::vector& expected_sizes) { + EXPECTS_STR(l_tensor.num_modes() == expected_sizes.size(), + "Expected sizes should be same as the dimensions of the input LocalTensor."); + auto tis_vec = l_tensor.tiled_index_spaces(); + bool result = true; + for(size_t i = 0; i < tis_vec.size(); i++) { + if(!check_local_tis_sizes(tis_vec.at(i), expected_sizes.at(i))) { + result = false; + break; + } + } + + return result; +} + +template +bool check_local_tensor_values(const LocalTensor& l_tensor, T value) { + EXPECTS_STR(l_tensor.is_allocated(), "LocalTensor should be allocated to check the values."); + + bool result = true; + auto tis_sizes = l_tensor.dim_sizes(); + + auto num_elements = 1; + + for(auto tis_sz: tis_sizes) { num_elements *= tis_sz; } + auto* local_buf = l_tensor.access_local_buf(); + for(size_t i = 0; i < num_elements; i++) { + if(local_buf[i] != value) { + result = false; + break; + } + } + return result; +} + +template +void test_local_tensor_constructors(Scheduler& sch, size_t N, Tile tilesize) { + // LocalTensor construction + // - TIS list + // - TIS vec + // - Labels + // - Sizes + TiledIndexSpace tis1{IndexSpace{range(N)}, tilesize}; auto [i, j, k, l, m] = tis1.labels<5>("all"); @@ -14,25 +62,178 @@ void test_local_tensor(Scheduler& sch, size_t N, Tile tilesize) { Tensor B{k, l}; Tensor C{i, j, l}; - sch.allocate(A, B, C)(A() = 1.0)(B() = 2.0)(C() = 3.0).execute(); + sch.allocate(A, B, C).execute(); + EXPECTS_STR(A.is_allocated() && B.is_allocated() && C.is_allocated(), + "All distributed tensors should be able to allocate!"); + + ExecutionContext local_ec{sch.ec().pg(), DistributionKind::nw, MemoryManagerKind::local}; + Scheduler sch_local{local_ec}; + + LocalTensor local_A{tis1, tis1, tis1}; + LocalTensor local_B{B.tiled_index_spaces()}; + LocalTensor local_C{i, j, l}; + LocalTensor local_D{N, N, N}; + LocalTensor local_E{10, 10, 10}; + + sch_local.allocate(local_A, local_B, local_C, local_D, local_E).execute(); + + EXPECTS_STR(local_A.is_allocated() && local_B.is_allocated() && local_C.is_allocated() && + local_D.is_allocated() && local_E.is_allocated(), + "All local tensors should be able to allocate!"); + + EXPECTS_STR(check_local_tensor_sizes(local_A, {N, N, N}), "Local_A is not correctly created!"); + EXPECTS_STR(check_local_tensor_sizes(local_B, {N, N}), "Local_B is not correctly created!"); + EXPECTS_STR(check_local_tensor_sizes(local_C, {N, N, N}), "Local_C is not correctly created!"); + EXPECTS_STR(check_local_tensor_sizes(local_D, {N, N, N}), "Local_D is not correctly created!"); + EXPECTS_STR(check_local_tensor_sizes(local_E, {10, 10, 10}), "Local_E is not correctly created!"); +} + +template +void test_local_tensor_block(ExecutionContext& ec, size_t N) { + // Block + // - Tensor - various sizes, test with 0 for any dim size + // - Matrix - various sizes, test with 0 for any dim size + + ExecutionContext local_ec{ec.pg(), DistributionKind::nw, MemoryManagerKind::local}; + Scheduler sch_local{local_ec}; + + LocalTensor local_A{N, N, N}; + LocalTensor local_B{N, N}; + + sch_local.allocate(local_A, local_B)(local_A() = 42.0)(local_B() = 21.0).execute(); + + auto local_C = local_A.block({0, 0, 0}, {4, 4, 4}); + auto local_D = local_B.block(0, 0, 4, 4); + + EXPECTS_STR(check_local_tensor_sizes(local_C, {4, 4, 4}), "Local_C is not correctly created!"); + EXPECTS_STR(check_local_tensor_sizes(local_D, {4, 4}), "Local_D is not correctly created!"); + + EXPECTS_STR(check_local_tensor_values(local_C, 42.0), "Local_C doesn't have correct values!"); + EXPECTS_STR(check_local_tensor_values(local_D, 21.0), "Local_D doesn't have correct values!"); +} + +template +void test_local_tensor_resize(ExecutionContext& ec, size_t N) { + // Resize + // - Smaller + // - Larger + // - Same size + // - all 0 size + // - change dim? + + ExecutionContext local_ec{ec.pg(), DistributionKind::nw, MemoryManagerKind::local}; + Scheduler sch_local{local_ec}; - LocalTensor A_local{A}; - LocalTensor B_local{B}; - LocalTensor C_local{C}; + LocalTensor local_A{N, N, N}; + LocalTensor local_B{N, N}; - std::cout << "A_local" << std::endl; - for(size_t i_idx = 0; i_idx < N; i_idx++) { - for(size_t j_idx = 0; j_idx < N; j_idx++) { - for(size_t k_idx = 0; k_idx < N; k_idx++) { - std::cout << A_local(i_idx, j_idx, k_idx) << "\t"; - A_local(i_idx, j_idx, k_idx) = 42.0; + sch_local.allocate(local_A, local_B)(local_A() = 42.0)(local_B() = 21.0).execute(); + + local_A.resize(5, 5, 5); + EXPECTS_STR(check_local_tensor_sizes(local_A, {5, 5, 5}), "Local_A is not correctly created!"); + EXPECTS_STR(check_local_tensor_values(local_A, 42.0), "Local_A doesn't have correct values!"); + + auto* tensor_ptr = local_A.base_ptr(); + local_A.resize(5, 5, 5); + auto* tensor_resize_ptr = local_A.base_ptr(); + + EXPECTS_STR(tensor_ptr == tensor_resize_ptr, + "Resize into same size should return the old tensor!"); + + local_A.resize(N, N, N); + EXPECTS_STR(check_local_tensor_sizes(local_A, {N, N, N}), "Local_A is not correctly created!"); + EXPECTS_STR(check_local_tensor_values(local_A.block({0, 0, 0}, {5, 5, 5}), 42.0), + "Local_A doesn't have correct values!"); + + // local_A.resize(0,0,0); + + // local_A.resize(5,5); +} + +template +void test_local_tensor_accessor(ExecutionContext& ec, size_t N) { + // Set/Get + // - Single access + // - Looped access + + ExecutionContext local_ec{ec.pg(), DistributionKind::nw, MemoryManagerKind::local}; + Scheduler sch_local{local_ec}; + + LocalTensor local_A{N, N, N}; + LocalTensor local_B{N, N}; + + sch_local.allocate(local_A, local_B)(local_A() = 42.0)(local_B() = 21.0).execute(); + + EXPECTS_STR(local_A.get(0, 0, 0) == 42.0, "The get value doesn't match the expected value."); + + local_A.set({0, 0, 0}, 1.0); + EXPECTS_STR(local_A.get(0, 0, 0) == 1.0, "The get value doesn't match the expected value."); + local_A.set({0, 0, 0}, 42.0); + + for(size_t i = 0; i < N; i++) { + for(size_t j = 0; j < N; j++) { + for(size_t k = 0; k < N; k++) { + EXPECTS_STR(local_A.get(i, j, k) == 42.0, + "The get value doesn't match the expected value."); + local_A.set({i, j, k}, local_B.get(i, j)); + EXPECTS_STR(local_A.get(i, j, k) == 21.0, + "The get value doesn't match the expected value."); } } - std::cout << std::endl; } - A_local.write_back_to_dist(); +} + +template +void test_local_tensor(Scheduler& sch, size_t N, Tile tilesize) { + TiledIndexSpace tis1{IndexSpace{range(N)}, tilesize}; + + auto [i, j, k, l, m] = tis1.labels<5>("all"); + + Tensor A{i, j, k}; + Tensor B{k, l}; + Tensor C{i, j, l}; + + sch.allocate(A, B, C)(A() = 1.0)(B() = 2.0)(C() = 3.0).execute(); + + ExecutionContext local_ec{sch.ec().pg(), DistributionKind::nw, MemoryManagerKind::local}; + + Scheduler sch_local{local_ec}; + + LocalTensor new_local1{i, j, k}; + LocalTensor new_local2{tis1, tis1, tis1}; + LocalTensor new_local3{N, N, N}; + LocalTensor new_local4{A.tiled_index_spaces()}; + + sch_local + .allocate(new_local1, new_local2, new_local3, new_local4)(new_local1() = 42.0)( + new_local2() = 21.0)(new_local3() = 1.0)(new_local4() = 2.0) + + // .deallocate() + .execute(); + + // std::cout << "A_local" << std::endl; + new_local3.init(42.0); + + std::cout << "value at 5,5,5 - " << new_local3.get(5, 5, 5) << std::endl; + new_local3.set({5, 5, 5}, 1.0); // memset val + auto val = new_local3.get({5, 5, 5}); // memset val + + std::cout << "new value at 5,5,5 - " << new_local3.get(5, 5, 5) << std::endl; + std::cout << "new_local4* before resize - " << new_local4.base_ptr() << std::endl; + new_local4.resize(N, N, N); // vector.resize()? eigen.resize()? + + std::cout << "new_local4* after resize - " << new_local4.base_ptr() << std::endl; + std::cout << "----------------------------------------------------" << std::endl; + std::cout << "new_local4* before resize - " << new_local4.base_ptr() << std::endl; + new_local4.resize(N + 5, N + 5, N + 5); // vector.resize()? eigen.resize()? + std::cout << "new_local4* after resize - " << new_local4.base_ptr() << std::endl; + auto new_local5 = new_local3.block({5, 5, 5}, {4, 4, 4}); - print_tensor(A); + print_tensor(new_local1); + print_tensor(new_local2); + print_tensor(new_local3); + print_tensor(new_local4); + print_tensor(new_local5); } int main(int argc, char* argv[]) { @@ -66,7 +267,11 @@ int main(int argc, char* argv[]) { std::cout << std::endl << std::endl; } - test_local_tensor(sch, is_size, tile_size); + // test_local_tensor(sch, is_size, tile_size); + test_local_tensor_constructors(sch, is_size, tile_size); + test_local_tensor_block(ec, is_size); + test_local_tensor_resize(ec, is_size); + test_local_tensor_accessor(ec, is_size); tamm::finalize();