diff --git a/.github/workflows/c-cpp.yaml b/.github/workflows/c-cpp.yaml index 929756708..053a530b5 100644 --- a/.github/workflows/c-cpp.yaml +++ b/.github/workflows/c-cpp.yaml @@ -180,7 +180,8 @@ jobs: upcxx --version export CPATH=$CPATH:/usr/lib/x86_64-linux-gnu/openmpi/include export CPATH=$CPATH:$INSTALL_PATH/include/eigen3 - UPCXX_CODEMODE=O3 CXX=upcxx $GITHUB_WORKSPACE/cmake-3.22.6-linux-x86_64/bin/cmake -H. -Bbuild -DCMAKE_INSTALL_PREFIX=$INSTALL_PATH -DENABLE_COVERAGE=ON -DUSE_UPCXX=ON -DJOB_LAUNCH_CMD="upcxx-run" + echo "UPCXX_SHARED_HEAP_SIZE=MAX" >> $GITHUB_ENV + UPCXX_CODEMODE=O3 CXX=upcxx $GITHUB_WORKSPACE/cmake-3.22.6-linux-x86_64/bin/cmake -H. -Bbuild -DCMAKE_INSTALL_PREFIX=$INSTALL_PATH -DUSE_UPCXX=ON -DJOB_LAUNCH_CMD="upcxx-run" cd build UPCXX_CODEMODE=O3 make -j2 UPCXX_CODEMODE=O3 make install @@ -193,7 +194,7 @@ jobs: export PATH=$PATH:$GITHUB_WORKSPACE/install/bin $GITHUB_WORKSPACE/cmake-3.22.6-linux-x86_64/bin/ctest -VV - name: gcovr - if: ${{ matrix.backend == 'ga' && matrix.cc != 'clang-11' }} + if: ${{ matrix.backend == 'ga' && matrix.cc != 'clang-11' && matrix.cc != 'gcc-11' }} run: | cd $GITHUB_WORKSPACE/build gcovr --root ./stage/$INSTALL_PATH . --xml ../coverage.xml diff --git a/.github/workflows/format.yaml b/.github/workflows/format.yaml index 0c76918d5..6f2567ca4 100644 --- a/.github/workflows/format.yaml +++ b/.github/workflows/format.yaml @@ -24,8 +24,8 @@ jobs: inplace: True - uses: EndBug/add-and-commit@v4 with: - author_name: Clang Robot - author_email: robot@example.com + author_name: TAMM developers + author_email: exachem23@gmail.com message: 'Committing clang-format changes' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/src/tamm/gpu_streams.hpp b/src/tamm/gpu_streams.hpp index ddae7cf86..c2240dab6 100644 --- a/src/tamm/gpu_streams.hpp +++ b/src/tamm/gpu_streams.hpp @@ -230,12 +230,12 @@ static inline void gpuEventSynchronize(gpuEvent_t event) { static inline bool gpuEventQuery(gpuEvent_t event) { #if defined(USE_DPCPP) - (event.get_info() == - sycl::info::event_command_status::complete); + return (event.get_info() == + sycl::info::event_command_status::complete); #elif defined(USE_HIP) - (hipEventQuery(event) == hipSuccess); + return (hipEventQuery(event) == hipSuccess); #elif defined(USE_CUDA) - (cudaEventQuery(event) == cudaSuccess); + return (cudaEventQuery(event) == cudaSuccess); #endif } diff --git a/src/tamm/kernels/multiply.hpp b/src/tamm/kernels/multiply.hpp index 8116b985a..3ab010375 100644 --- a/src/tamm/kernels/multiply.hpp +++ b/src/tamm/kernels/multiply.hpp @@ -493,8 +493,9 @@ void block_multiply( bsize.value(), binter_buf_dev); } + // This is where some recent commits to Complex is done! check it gemm_wrapper(hw, thandle, AR, BR, B, M, N, K, alpha, beta, abuf_complex, abuf_complex_dev, - binter_buf, binter_buf_dev, cinter_buf, cinter_tmp_buf_dev); + bbuf_complex, bbuf_complex_dev, cinter_buf, cinter_tmp_buf_dev); transpose_output(hw, thandle, gpu_trans, cinter_buf, cinter_dims, cinter_labels, cbuf, cdims, clabels, cinter_buf_dev, cinter_tmp_buf_dev, is_assign); diff --git a/tests/tamm/Test_CCSD.cpp b/tests/tamm/Test_CCSD.cpp new file mode 100644 index 000000000..7df906514 --- /dev/null +++ b/tests/tamm/Test_CCSD.cpp @@ -0,0 +1,498 @@ +#include "ccse_tensors.hpp" + +using CCEType = double; +TiledIndexSpace o_alpha, v_alpha, o_beta, v_beta; + +Tensor _a01V, _a02V, _a007V; +CCSE_Tensors _a01, _a02, _a03, _a04, _a05, _a06, _a001, _a004, _a006, _a008, _a009, _a017, + _a019, _a020, _a021, _a022; + +Tensor i0_temp, t2_aaaa_temp; // CS only + +template +std::tuple, Tensor, Tensor, Tensor, Tensor> +setupTensors_cs(ExecutionContext& ec, TiledIndexSpace& MO, Tensor d_f1) { + auto rank = ec.pg().rank(); + + const TiledIndexSpace& O = MO("occ"); + const TiledIndexSpace& V = MO("virt"); + + const int otiles = O.num_tiles(); + const int vtiles = V.num_tiles(); + const int oatiles = MO("occ_alpha").num_tiles(); + const int obtiles = MO("occ_beta").num_tiles(); + const int vatiles = MO("virt_alpha").num_tiles(); + const int vbtiles = MO("virt_beta").num_tiles(); + + TiledIndexSpace o_alpha, v_alpha, o_beta, v_beta; + o_alpha = {MO("occ"), range(oatiles)}; + v_alpha = {MO("virt"), range(vatiles)}; + o_beta = {MO("occ"), range(obtiles, otiles)}; + v_beta = {MO("virt"), range(vbtiles, vtiles)}; + + std::vector p_evl_sorted = tamm::diagonal(d_f1); + + Tensor d_r1{{v_alpha, o_alpha}, {1, 1}}; + Tensor d_r2{{v_alpha, v_beta, o_alpha, o_beta}, {2, 2}}; + + Tensor::allocate(&ec, d_r1, d_r2); + + Tensor d_t1{{v_alpha, o_alpha}, {1, 1}}; + Tensor d_t2{{v_alpha, v_beta, o_alpha, o_beta}, {2, 2}}; + + Tensor::allocate(&ec, d_t1, d_t2); + + return std::make_tuple(p_evl_sorted, d_t1, d_t2, d_r1, d_r2); +} + +template +void ccsd_e_cs(Scheduler& sch, const TiledIndexSpace& MO, const TiledIndexSpace& CI, Tensor& de, + const Tensor& t1_aa, const Tensor& t2_abab, const Tensor& t2_aaaa, + std::vector>& f1_se, std::vector>& chol3d_se) { + auto [cind] = CI.labels<1>("all"); + + auto [p1_va, p2_va] = v_alpha.labels<2>("all"); + auto [p1_vb] = v_beta.labels<1>("all"); + auto [h1_oa, h2_oa] = o_alpha.labels<2>("all"); + auto [h1_ob] = o_beta.labels<1>("all"); + + // f1_se = {f1_oo,f1_ov,f1_vv} + // chol3d_se = {chol3d_oo,chol3d_ov,chol3d_vv} + auto f1_ov = f1_se[1]; + auto chol3d_ov = chol3d_se[1]; + + // clang-format off + sch + (t2_aaaa_temp()=0) + .exact_copy(t2_aaaa(p1_va, p2_va, h1_oa, h2_oa), t2_abab(p1_va, p2_va, h1_oa, h2_oa)) + (t2_aaaa_temp() = t2_aaaa(), + "t2_aaaa_temp() = t2_aaaa()") + (t2_aaaa(p1_va,p2_va,h1_oa,h2_oa) += -1.0 * t2_aaaa_temp(p2_va,p1_va,h1_oa,h2_oa), + "t2_aaaa(p1_va,p2_va,h1_oa,h2_oa) += -1.0 * t2_aaaa_temp(p2_va,p1_va,h1_oa,h2_oa)") + (t2_aaaa_temp(p1_va,p2_va,h1_oa,h2_oa) += 1.0 * t2_aaaa(p2_va,p1_va,h2_oa,h1_oa), + "t2_aaaa_temp(p1_va,p2_va,h1_oa,h2_oa) += 1.0 * t2_aaaa(p2_va,p1_va,h2_oa,h1_oa)") + + (_a01V(cind) = t1_aa(p1_va, h1_oa) * chol3d_ov("aa")(h1_oa, p1_va, cind), + "_a01V(cind) = t1_aa(p1_va, h1_oa) * chol3d_ov( aa )(h1_oa, p1_va, cind)") + (_a02("aa")(h1_oa, h2_oa, cind) = t1_aa(p1_va, h1_oa) * chol3d_ov("aa")(h2_oa, p1_va, cind), + "_a02( aa )(h1_oa, h2_oa, cind) = t1_aa(p1_va, h1_oa) * chol3d_ov( aa )(h2_oa, p1_va, cind)") + (_a03("aa")(h2_oa, p2_va, cind) = t2_aaaa_temp(p2_va, p1_va, h2_oa, h1_oa) * chol3d_ov("aa")(h1_oa, p1_va, cind), + "_a03( aa )(h2_oa, p2_va, cind) = t2_aaaa_temp(p2_va, p1_va, h2_oa, h1_oa) * chol3d_ov( aa )(h1_oa, p1_va, cind)") + (de() = 2.0 * _a01V() * _a01V(), + "de() = 2.0 * _a01V() * _a01V()") + (de() += -1.0 * _a02("aa")(h1_oa, h2_oa, cind) * _a02("aa")(h2_oa, h1_oa, cind), + "de() += -1.0 * _a02( aa )(h1_oa, h2_oa, cind) * _a02( aa )(h2_oa, h1_oa, cind)") + (de() += 1.0 * _a03("aa")(h1_oa, p1_va, cind) * chol3d_ov("aa")(h1_oa, p1_va, cind), + "de() += 1.0 * _a03( aa )(h1_oa, p1_va, cind) * chol3d_ov( aa )(h1_oa, p1_va, cind)") + (de() += 2.0 * t1_aa(p1_va, h1_oa) * f1_ov("aa")(h1_oa, p1_va), + "de() += 2.0 * t1_aa(p1_va, h1_oa) * f1_ov( aa )(h1_oa, p1_va)") // NEW TERM + ; + // clang-format on +} + +template +void ccsd_t1_cs(Scheduler& sch, const TiledIndexSpace& MO, const TiledIndexSpace& CI, + Tensor& i0_aa, const Tensor& t1_aa, const Tensor& t2_abab, + std::vector>& f1_se, std::vector>& chol3d_se) { + auto [cind] = CI.labels<1>("all"); + auto [p2] = MO.labels<1>("virt"); + auto [h1] = MO.labels<1>("occ"); + + auto [p1_va, p2_va] = v_alpha.labels<2>("all"); + auto [p1_vb] = v_beta.labels<1>("all"); + auto [h1_oa, h2_oa] = o_alpha.labels<2>("all"); + auto [h1_ob] = o_beta.labels<1>("all"); + + // f1_se = {f1_oo,f1_ov,f1_vv} + // chol3d_se = {chol3d_oo,chol3d_ov,chol3d_vv} + auto f1_oo = f1_se[0]; + auto f1_ov = f1_se[1]; + auto f1_vv = f1_se[2]; + auto chol3d_oo = chol3d_se[0]; + auto chol3d_ov = chol3d_se[1]; + auto chol3d_vv = chol3d_se[2]; + + // clang-format off + sch + (i0_aa(p2_va, h1_oa) = 1.0 * f1_ov("aa")(h1_oa, p2_va), + "i0_aa(p2_va, h1_oa) = 1.0 * f1_ov( aa )(h1_oa, p2_va)") + (_a01("aa")(h2_oa, h1_oa, cind) = 1.0 * t1_aa(p1_va, h1_oa) * chol3d_ov("aa")(h2_oa, p1_va, cind), + "_a01( aa )(h2_oa, h1_oa, cind) = 1.0 * t1_aa(p1_va, h1_oa) * chol3d_ov( aa )(h2_oa, p1_va, cind)") // ovm + (_a02V(cind) = 2.0 * t1_aa(p1_va, h1_oa) * chol3d_ov("aa")(h1_oa, p1_va, cind), + "_a02V(cind) = 2.0 * t1_aa(p1_va, h1_oa) * chol3d_ov( aa )(h1_oa, p1_va, cind)") // ovm + // (_a02V(cind) = 2.0 * _a01("aa")(h1_oa, h1_oa, cind)) + (_a05("aa")(h2_oa, p1_va) = -1.0 * chol3d_ov("aa")(h1_oa, p1_va, cind) * _a01("aa")(h2_oa, h1_oa, cind), + "_a05( aa )(h2_oa, p1_va) = -1.0 * chol3d_ov( aa )(h1_oa, p1_va, cind) * _a01( aa )(h2_oa, h1_oa, cind)") // o2vm + (_a05("aa")(h2_oa, p1_va) += 1.0 * f1_ov("aa")(h2_oa, p1_va), + "_a05( aa )(h2_oa, p1_va) += 1.0 * f1_ov( aa )(h2_oa, p1_va)") // NEW TERM + // .exact_copy(_a05_bb(h1_ob,p1_vb),_a05_aa(h1_ob,p1_vb)) + + (_a06("aa")(p1_va, h1_oa, cind) = -1.0 * t2_aaaa_temp(p1_va, p2_va, h1_oa, h2_oa) * chol3d_ov("aa")(h2_oa, p2_va, cind), + "_a06( aa )(p1_va, h1_oa, cind) = -1.0 * t2_aaaa_temp(p1_va, p2_va, h1_oa, h2_oa) * chol3d_ov( aa )(h2_oa, p2_va, cind)") // o2v2m + (_a04("aa")(h2_oa, h1_oa) = -1.0 * f1_oo("aa")(h2_oa, h1_oa), + "_a04( aa )(h2_oa, h1_oa) = -1.0 * f1_oo( aa )(h2_oa, h1_oa)") // MOVED TERM + (_a04("aa")(h2_oa, h1_oa) += 1.0 * chol3d_ov("aa")(h2_oa, p1_va, cind) * _a06("aa")(p1_va, h1_oa, cind), + "_a04( aa )(h2_oa, h1_oa) += 1.0 * chol3d_ov( aa )(h2_oa, p1_va, cind) * _a06( aa )(p1_va, h1_oa, cind)") // o2vm + (_a04("aa")(h2_oa, h1_oa) += -1.0 * t1_aa(p1_va, h1_oa) * f1_ov("aa")(h2_oa, p1_va), + "_a04( aa )(h2_oa, h1_oa) += -1.0 * t1_aa(p1_va, h1_oa) * f1_ov( aa )(h2_oa, p1_va)") // NEW TERM + (i0_aa(p2_va, h1_oa) += 1.0 * t1_aa(p2_va, h2_oa) * _a04("aa")(h2_oa, h1_oa), + "i0_aa(p2_va, h1_oa) += 1.0 * t1_aa(p2_va, h2_oa) * _a04( aa )(h2_oa, h1_oa)") // o2v + (i0_aa(p1_va, h2_oa) += 1.0 * chol3d_ov("aa")(h2_oa, p1_va, cind) * _a02V(cind), + "i0_aa(p1_va, h2_oa) += 1.0 * chol3d_ov( aa )(h2_oa, p1_va, cind) * _a02V(cind)") // ovm + (i0_aa(p1_va, h2_oa) += 1.0 * t2_aaaa_temp(p1_va, p2_va, h2_oa, h1_oa) * _a05("aa")(h1_oa, p2_va), + "i0_aa(p1_va, h2_oa) += 1.0 * t2_aaaa_temp(p1_va, p2_va, h2_oa, h1_oa) * _a05( aa )(h1_oa, p2_va)") + (i0_aa(p2_va, h1_oa) += -1.0 * chol3d_vv("aa")(p2_va, p1_va, cind) * _a06("aa")(p1_va, h1_oa, cind), + "i0_aa(p2_va, h1_oa) += -1.0 * chol3d_vv( aa )(p2_va, p1_va, cind) * _a06( aa )(p1_va, h1_oa, cind)") // ov2m + (_a06("aa")(p2_va, h2_oa, cind) += -1.0 * t1_aa(p1_va, h2_oa) * chol3d_vv("aa")(p2_va, p1_va, cind), + "_a06( aa )(p2_va, h2_oa, cind) += -1.0 * t1_aa(p1_va, h2_oa) * chol3d_vv( aa )(p2_va, p1_va, cind)") // ov2m + (i0_aa(p1_va, h2_oa) += -1.0 * _a06("aa")(p1_va, h2_oa, cind) * _a02V(cind), + "i0_aa(p1_va, h2_oa) += -1.0 * _a06( aa )(p1_va, h2_oa, cind) * _a02V(cind)") // ovm + (_a06("aa")(p2_va, h1_oa, cind) += -1.0 * t1_aa(p2_va, h1_oa) * _a02V(cind), + "_a06( aa )(p2_va, h1_oa, cind) += -1.0 * t1_aa(p2_va, h1_oa) * _a02V(cind)") // ovm + (_a06("aa")(p2_va, h1_oa, cind) += 1.0 * t1_aa(p2_va, h2_oa) * _a01("aa")(h2_oa, h1_oa, cind), + "_a06( aa )(p2_va, h1_oa, cind) += 1.0 * t1_aa(p2_va, h2_oa) * _a01( aa )(h2_oa, h1_oa, cind)") // o2vm + (_a01("aa")(h2_oa, h1_oa, cind) += 1.0 * chol3d_oo("aa")(h2_oa, h1_oa, cind), + "_a01( aa )(h2_oa, h1_oa, cind) += 1.0 * chol3d_oo( aa )(h2_oa, h1_oa, cind)") // o2m + (i0_aa(p2_va, h1_oa) += 1.0 * _a01("aa")(h2_oa, h1_oa, cind) * _a06("aa")(p2_va, h2_oa, cind), + "i0_aa(p2_va, h1_oa) += 1.0 * _a01( aa )(h2_oa, h1_oa, cind) * _a06( aa )(p2_va, h2_oa, cind)") // o2vm + // (i0_aa(p2_va, h1_oa) += -1.0 * t1_aa(p2_va, h2_oa) * f1_oo("aa")(h2_oa, h1_oa), // MOVED ABOVE + // "i0_aa(p2_va, h1_oa) += -1.0 * t1_aa(p2_va, h2_oa) * f1_oo( aa )(h2_oa, h1_oa)") // o2v + (i0_aa(p2_va, h1_oa) += 1.0 * t1_aa(p1_va, h1_oa) * f1_vv("aa")(p2_va, p1_va), + "i0_aa(p2_va, h1_oa) += 1.0 * t1_aa(p1_va, h1_oa) * f1_vv( aa )(p2_va, p1_va)") // ov2 + ; + // clang-format on +} + +template +void ccsd_t2_cs(Scheduler& sch, const TiledIndexSpace& MO, const TiledIndexSpace& CI, + Tensor& i0_abab, const Tensor& t1_aa, Tensor& t2_abab, Tensor& t2_aaaa, + std::vector>& f1_se, std::vector>& chol3d_se) { + auto [cind] = CI.labels<1>("all"); + auto [p3, p4] = MO.labels<2>("virt"); + auto [h1, h2] = MO.labels<2>("occ"); + + auto [p1_va, p2_va, p3_va] = v_alpha.labels<3>("all"); + auto [p1_vb, p2_vb] = v_beta.labels<2>("all"); + auto [h1_oa, h2_oa, h3_oa] = o_alpha.labels<3>("all"); + auto [h1_ob, h2_ob] = o_beta.labels<2>("all"); + + // f1_se = {f1_oo,f1_ov,f1_vv} + // chol3d_se = {chol3d_oo,chol3d_ov,chol3d_vv} + auto f1_oo = f1_se[0]; + auto f1_ov = f1_se[1]; + auto f1_vv = f1_se[2]; + auto chol3d_oo = chol3d_se[0]; + auto chol3d_ov = chol3d_se[1]; + auto chol3d_vv = chol3d_se[2]; + + // clang-format off + sch + (_a017("aa")(p1_va, h2_oa, cind) = -1.0 * t2_aaaa_temp(p1_va, p2_va, h2_oa, h1_oa) * chol3d_ov("aa")(h1_oa, p2_va, cind), + "_a017( aa )(p1_va, h2_oa, cind) = -1.0 * t2_aaaa_temp(p1_va, p2_va, h2_oa, h1_oa) * chol3d_ov( aa )(h1_oa, p2_va, cind)") + (_a006("aa")(h2_oa, h1_oa) = -1.0 * chol3d_ov("aa")(h2_oa, p2_va, cind) * _a017("aa")(p2_va, h1_oa, cind), + "_a006( aa )(h2_oa, h1_oa) = -1.0 * chol3d_ov( aa )(h2_oa, p2_va, cind) * _a017( aa )(p2_va, h1_oa, cind)") + (_a007V(cind) = 2.0 * chol3d_ov("aa")(h1_oa, p1_va, cind) * t1_aa(p1_va, h1_oa), + "_a007V(cind) = 2.0 * chol3d_ov( aa )(h1_oa, p1_va, cind) * t1_aa(p1_va, h1_oa)") + (_a009("aa")(h1_oa, h2_oa, cind) = 1.0 * chol3d_ov("aa")(h1_oa, p1_va, cind) * t1_aa(p1_va, h2_oa), + "_a009( aa )(h1_oa, h2_oa, cind) = 1.0 * chol3d_ov( aa )(h1_oa, p1_va, cind) * t1_aa(p1_va, h2_oa)") + (_a021("aa")(p2_va, p1_va, cind) = -0.5 * chol3d_ov("aa")(h1_oa, p1_va, cind) * t1_aa(p2_va, h1_oa), + "_a021( aa )(p2_va, p1_va, cind) = -0.5 * chol3d_ov( aa )(h1_oa, p1_va, cind) * t1_aa(p2_va, h1_oa)") + (_a021("aa")(p2_va, p1_va, cind) += 0.5 * chol3d_vv("aa")(p2_va, p1_va, cind), + "_a021( aa )(p2_va, p1_va, cind) += 0.5 * chol3d_vv( aa )(p2_va, p1_va, cind)") + (_a017("aa")(p1_va, h2_oa, cind) += -2.0 * t1_aa(p2_va, h2_oa) * _a021("aa")(p1_va, p2_va, cind), + "_a017( aa )(p1_va, h2_oa, cind) += -2.0 * t1_aa(p2_va, h2_oa) * _a021( aa )(p1_va, p2_va, cind)") + (_a008("aa")(h2_oa, h1_oa, cind) = 1.0 * _a009("aa")(h2_oa, h1_oa, cind), + "_a008( aa )(h2_oa, h1_oa, cind) = 1.0 * _a009( aa )(h2_oa, h1_oa, cind)") + (_a009("aa")(h2_oa, h1_oa, cind) += 1.0 * chol3d_oo("aa")(h2_oa, h1_oa, cind), + "_a009( aa )(h2_oa, h1_oa, cind) += 1.0 * chol3d_oo( aa )(h2_oa, h1_oa, cind)") + // .exact_copy(_a009("bb")(h2_ob,h1_ob,cind),_a009("aa")(h2_ob,h1_ob,cind)) + // .exact_copy(_a021("bb")(p2_vb,p1_vb,cind),_a021("aa")(p2_vb,p1_vb,cind)) + (_a001("aa")(p1_va, p2_va) = -2.0 * _a021("aa")(p1_va, p2_va, cind) * _a007V(cind), + "_a001( aa )(p1_va, p2_va) = -2.0 * _a021( aa )(p1_va, p2_va, cind) * _a007V(cind)") + (_a001("aa")(p1_va, p2_va) += -1.0 * _a017("aa")(p1_va, h2_oa, cind) * chol3d_ov("aa")(h2_oa, p2_va, cind), + "_a001( aa )(p1_va, p2_va) += -1.0 * _a017( aa )(p1_va, h2_oa, cind) * chol3d_ov( aa )(h2_oa, p2_va, cind)") + (_a006("aa")(h2_oa, h1_oa) += 1.0 * _a009("aa")(h2_oa, h1_oa, cind) * _a007V(cind), + "_a006( aa )(h2_oa, h1_oa) += 1.0 * _a009( aa )(h2_oa, h1_oa, cind) * _a007V(cind)") + (_a006("aa")(h3_oa, h1_oa) += -1.0 * _a009("aa")(h2_oa, h1_oa, cind) * _a008("aa")(h3_oa, h2_oa, cind), + "_a006( aa )(h3_oa, h1_oa) += -1.0 * _a009( aa )(h2_oa, h1_oa, cind) * _a008( aa )(h3_oa, h2_oa, cind)") + (_a019("abab")(h2_oa, h1_ob, h1_oa, h2_ob) = 0.25 * _a009("aa")(h2_oa, h1_oa, cind) * _a009("bb")(h1_ob, h2_ob, cind), + "_a019( abab )(h2_oa, h1_ob, h1_oa, h2_ob) = 0.25 * _a009( aa )(h2_oa, h1_oa, cind) * _a009( bb )(h1_ob, h2_ob, cind)") + (_a020("aaaa")(p2_va, h2_oa, p1_va, h1_oa) = -2.0 * _a009("aa")(h2_oa, h1_oa, cind) * _a021("aa")(p2_va, p1_va, cind), + "_a020( aaaa )(p2_va, h2_oa, p1_va, h1_oa) = -2.0 * _a009( aa )(h2_oa, h1_oa, cind) * _a021( aa )(p2_va, p1_va, cind)") + // .exact_copy(_a020("baba")(p2_vb, h2_oa, p1_vb, h1_oa),_a020("aaaa")(p2_vb, h2_oa, p1_vb, h1_oa)) + (_a020("aaaa")(p1_va, h3_oa, p3_va, h2_oa) += 0.5 * _a004("aaaa")(p2_va, p3_va, h3_oa, h1_oa) * t2_aaaa(p1_va,p2_va,h1_oa,h2_oa), + "_a020( aaaa )(p1_va, h3_oa, p3_va, h2_oa) += 0.5 * _a004( aaaa )(p2_va, p3_va, h3_oa, h1_oa) * t2_aaaa(p1_va,p2_va,h1_oa,h2_oa)") + (_a020("baab")(p1_vb, h2_oa, p1_va, h2_ob) = -0.5 * _a004("aaaa")(p2_va, p1_va, h2_oa, h1_oa) * t2_abab(p2_va,p1_vb,h1_oa,h2_ob), + "_a020( baab )(p1_vb, h2_oa, p1_va, h2_ob) = -0.5 * _a004( aaaa )(p2_va, p1_va, h2_oa, h1_oa) * t2_abab(p2_va,p1_vb,h1_oa,h2_ob)") + (_a020("baba")(p1_vb, h1_oa, p2_vb, h2_oa) += 0.5 * _a004("abab")(p1_va, p2_vb, h1_oa, h1_ob) * t2_abab(p1_va,p1_vb,h2_oa,h1_ob), + "_a020( baba )(p1_vb, h1_oa, p2_vb, h2_oa) += 0.5 * _a004( abab )(p1_va, p2_vb, h1_oa, h1_ob) * t2_abab(p1_va,p1_vb,h2_oa,h1_ob)") + (_a017("aa")(p1_va, h2_oa, cind) += 1.0 * t1_aa(p1_va, h1_oa) * chol3d_oo("aa")(h1_oa, h2_oa, cind), + "_a017( aa )(p1_va, h2_oa, cind) += 1.0 * t1_aa(p1_va, h1_oa) * chol3d_oo( aa )(h1_oa, h2_oa, cind)") + (_a017("aa")(p1_va, h2_oa, cind) += -1.0 * chol3d_ov("aa")(h2_oa, p1_va, cind), + "_a017( aa )(p1_va, h2_oa, cind) += -1.0 * chol3d_ov( aa )(h2_oa, p1_va, cind)") + (_a001("aa")(p2_va, p1_va) += -1.0 * f1_vv("aa")(p2_va, p1_va), + "_a001( aa )(p2_va, p1_va) += -1.0 * f1_vv( aa )(p2_va, p1_va)") + (_a001("aa")(p2_va, p1_va) += 1.0 * t1_aa(p2_va, h1_oa) * f1_ov("aa")(h1_oa, p1_va), + "_a001( aa )(p2_va, p1_va) += 1.0 * t1_aa(p2_va, h1_oa) * f1_ov( aa )(h1_oa, p1_va)") // NEW TERM + (_a006("aa")(h2_oa, h1_oa) += 1.0 * f1_oo("aa")(h2_oa, h1_oa), + "_a006( aa )(h2_oa, h1_oa) += 1.0 * f1_oo( aa )(h2_oa, h1_oa)") + (_a006("aa")(h2_oa, h1_oa) += 1.0 * t1_aa(p1_va, h1_oa) * f1_ov("aa")(h2_oa, p1_va), + "_a006( aa )(h2_oa, h1_oa) += 1.0 * t1_aa(p1_va, h1_oa) * f1_ov( aa )(h2_oa, p1_va)") + // .exact_copy(_a017("bb")(p1_vb, h1_ob, cind), _a017("aa")(p1_vb, h1_ob, cind)) + // .exact_copy(_a006("bb")(h1_ob, h2_ob), _a006("aa")(h1_ob, h2_ob)) + // .exact_copy(_a001("bb")(p1_vb, p2_vb), _a001("aa")(p1_vb, p2_vb)) + // .exact_copy(_a021("bb")(p1_vb, p2_vb, cind), _a021("aa")(p1_vb, p2_vb, cind)) + // .exact_copy(_a020("bbbb")(p1_vb, h1_ob, p2_vb, h2_ob), _a020("aaaa")(p1_vb, h1_ob, p2_vb, h2_ob)) + + (i0_abab(p1_va, p2_vb, h2_oa, h1_ob) = 1.0 * _a020("bbbb")(p2_vb, h2_ob, p1_vb, h1_ob) * t2_abab(p1_va, p1_vb, h2_oa, h2_ob), + "i0_abab(p1_va, p2_vb, h2_oa, h1_ob) = 1.0 * _a020(bbbb)(p2_vb, h2_ob, p1_vb, h1_ob) * t2_abab(p1_va, p1_vb, h2_oa, h2_ob)") + (i0_abab(p2_va, p1_vb, h2_oa, h1_ob) += 1.0 * _a020("baab")(p1_vb, h1_oa, p1_va, h1_ob) * t2_aaaa(p2_va, p1_va, h2_oa, h1_oa), + "i0_abab(p2_va, p1_vb, h2_oa, h1_ob) += 1.0 * _a020(baab)(p1_vb, h1_oa, p1_va, h1_ob) * t2_aaaa(p2_va, p1_va, h2_oa, h1_oa)") + (i0_abab(p1_va, p1_vb, h2_oa, h1_ob) += 1.0 * _a020("baba")(p1_vb, h1_oa, p2_vb, h2_oa) * t2_abab(p1_va, p2_vb, h1_oa, h1_ob), + "i0_abab(p1_va, p1_vb, h2_oa, h1_ob) += 1.0 * _a020(baba)(p1_vb, h1_oa, p2_vb, h2_oa) * t2_abab(p1_va, p2_vb, h1_oa, h1_ob)") + // .exact_copy(i0_temp(p1_vb,p1_va,h2_ob,h1_oa),i0_abab(p1_vb,p1_va,h2_ob,h1_oa)) + (i0_abab(p1_va, p1_vb, h2_oa, h1_ob) += 1.0 * i0_temp(p1_vb, p1_va, h1_ob, h2_oa), + "i0_abab(p1_va, p1_vb, h2_oa, h1_ob) += 1.0 * i0_temp(p1_vb, p1_va, h1_ob, h2_oa)") + (i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += 1.0 * _a017("aa")(p1_va, h1_oa, cind) * _a017("bb")(p1_vb, h2_ob, cind), + "i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += 1.0 * _a017( aa )(p1_va, h1_oa, cind) * _a017( bb )(p1_vb, h2_ob, cind)") + (_a022("abab")(p1_va,p2_vb,p2_va,p1_vb) = 1.0 * _a021("aa")(p1_va,p2_va,cind) * _a021("bb")(p2_vb,p1_vb,cind), + "_a022( abab )(p1_va,p2_vb,p2_va,p1_vb) = 1.0 * _a021( aa )(p1_va,p2_va,cind) * _a021( bb )(p2_vb,p1_vb,cind)") + (i0_abab(p1_va, p2_vb, h1_oa, h2_ob) += 4.0 * _a022("abab")(p1_va, p2_vb, p2_va, p1_vb) * t2_abab(p2_va,p1_vb,h1_oa,h2_ob), + "i0_abab(p1_va, p2_vb, h1_oa, h2_ob) += 4.0 * _a022( abab )(p1_va, p2_vb, p2_va, p1_vb) * t2_abab(p2_va,p1_vb,h1_oa,h2_ob)") + (_a019("abab")(h2_oa, h1_ob, h1_oa, h2_ob) += 0.25 * _a004("abab")(p1_va, p2_vb, h2_oa, h1_ob) * t2_abab(p1_va,p2_vb,h1_oa,h2_ob), + "_a019( abab )(h2_oa, h1_ob, h1_oa, h2_ob) += 0.25 * _a004( abab )(p1_va, p2_vb, h2_oa, h1_ob) * t2_abab(p1_va,p2_vb,h1_oa,h2_ob)") + (i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += 4.0 * _a019("abab")(h2_oa, h1_ob, h1_oa, h2_ob) * t2_abab(p1_va, p1_vb, h2_oa, h1_ob), + "i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += 4.0 * _a019( abab )(h2_oa, h1_ob, h1_oa, h2_ob) * t2_abab(p1_va, p1_vb, h2_oa, h1_ob)") + (i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += -1.0 * t2_abab(p1_va, p2_vb, h1_oa, h2_ob) * _a001("bb")(p1_vb, p2_vb), + "i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += -1.0 * t2_abab(p1_va, p2_vb, h1_oa, h2_ob) * _a001( bb )(p1_vb, p2_vb)") + (i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += -1.0 * t2_abab(p2_va, p1_vb, h1_oa, h2_ob) * _a001("aa")(p1_va, p2_va), + "i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += -1.0 * t2_abab(p2_va, p1_vb, h1_oa, h2_ob) * _a001( aa )(p1_va, p2_va)") + (i0_abab(p1_va, p1_vb, h2_oa, h1_ob) += -1.0 * t2_abab(p1_va, p1_vb, h1_oa, h1_ob) * _a006("aa")(h1_oa, h2_oa), + "i0_abab(p1_va, p1_vb, h2_oa, h1_ob) += -1.0 * t2_abab(p1_va, p1_vb, h1_oa, h1_ob) * _a006( aa )(h1_oa, h2_oa)") + (i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += -1.0 * t2_abab(p1_va, p1_vb, h1_oa, h1_ob) * _a006("bb")(h1_ob, h2_ob), + "i0_abab(p1_va, p1_vb, h1_oa, h2_ob) += -1.0 * t2_abab(p1_va, p1_vb, h1_oa, h1_ob) * _a006( bb )(h1_ob, h2_ob)") + ; + // clang-format on +} + +int main(int argc, char* argv[]) { + using T = double; + + tamm::initialize(argc, argv); + + if(argc < 5) { + tamm_terminate("Please provide occ_alpha, virt_alpha, cholesky-count and tile size"); + } + + size_t n_occ_alpha = atoi(argv[1]); + size_t n_vir_alpha = atoi(argv[2]); + size_t chol_count = atoi(argv[3]); + Tile tile_size = atoi(argv[4]); + + const auto nbf = n_occ_alpha + n_vir_alpha; + + ProcGroup pg = ProcGroup::create_world_coll(); + ExecutionContext ec{pg, DistributionKind::nw, MemoryManagerKind::ga}; + + ExecutionHW exhw = ec.exhw(); + + Scheduler sch{ec}; + + bool profile = false; + + if(ec.print()) { + std::cout << "basis functions: " << nbf << ", occ: " << n_occ_alpha << ", virt: " << n_vir_alpha + << ", chol-count: " << chol_count << ", tilesize: " << tile_size << std::endl; + } + + //----------------------------------- + + TAMM_SIZE n_occ_beta = n_occ_alpha; + Tile tce_tile = tile_size; + + TAMM_SIZE nmo = 2 * nbf; + TAMM_SIZE n_vir_beta = n_vir_alpha; + TAMM_SIZE nocc = 2 * n_occ_alpha; + + const TAMM_SIZE total_orbitals = nmo; + + // Construction of tiled index space MO + IndexSpace MO_IS{ + range(0, total_orbitals), + {{"occ", {range(0, nocc)}}, + {"occ_alpha", {range(0, n_occ_alpha)}}, + {"occ_beta", {range(n_occ_alpha, nocc)}}, + {"virt", {range(nocc, total_orbitals)}}, + {"virt_alpha", {range(nocc, nocc + n_vir_alpha)}}, + {"virt_beta", {range(nocc + n_vir_alpha, total_orbitals)}}}, + {{Spin{1}, {range(0, n_occ_alpha), range(nocc, nocc + n_vir_alpha)}}, + {Spin{2}, {range(n_occ_alpha, nocc), range(nocc + n_vir_alpha, total_orbitals)}}}}; + + std::vector mo_tiles; + + tamm::Tile est_nt = static_cast(std::ceil(1.0 * n_occ_alpha / tce_tile)); + for(tamm::Tile x = 0; x < est_nt; x++) + mo_tiles.push_back(n_occ_alpha / est_nt + (x < (n_occ_alpha % est_nt))); + + est_nt = static_cast(std::ceil(1.0 * n_occ_beta / tce_tile)); + for(tamm::Tile x = 0; x < est_nt; x++) + mo_tiles.push_back(n_occ_beta / est_nt + (x < (n_occ_beta % est_nt))); + + est_nt = static_cast(std::ceil(1.0 * n_vir_alpha / tce_tile)); + for(tamm::Tile x = 0; x < est_nt; x++) + mo_tiles.push_back(n_vir_alpha / est_nt + (x < (n_vir_alpha % est_nt))); + + est_nt = static_cast(std::ceil(1.0 * n_vir_beta / tce_tile)); + for(tamm::Tile x = 0; x < est_nt; x++) + mo_tiles.push_back(n_vir_beta / est_nt + (x < (n_vir_beta % est_nt))); + + TiledIndexSpace MO{MO_IS, mo_tiles}; + + //---------------------------------------------------- + + TiledIndexSpace N = MO("all"); + + Tensor d_f1{{N, N}, {1, 1}}; + Tensor::allocate(&ec, d_f1); + + std::vector p_evl_sorted; + Tensor t1_aa, t2_abab, r1_aa, r2_abab; + + std::tie(p_evl_sorted, t1_aa, t2_abab, r1_aa, r2_abab) = setupTensors_cs(ec, MO, d_f1); + + IndexSpace chol_is{range(0, chol_count)}; + TiledIndexSpace CI{chol_is, 1000}; + + // cholVpr = {{N, N, CI}, {SpinPosition::upper, SpinPosition::lower, SpinPosition::ignore}}; + // Tensor::allocate(&ec, cholVpr); + + const TiledIndexSpace& O = MO("occ"); + const TiledIndexSpace& V = MO("virt"); + auto [cind] = CI.labels<1>("all"); + + const int otiles = O.num_tiles(); + const int vtiles = V.num_tiles(); + const int oatiles = MO("occ_alpha").num_tiles(); + const int obtiles = MO("occ_beta").num_tiles(); + const int vatiles = MO("virt_alpha").num_tiles(); + const int vbtiles = MO("virt_beta").num_tiles(); + + o_alpha = {MO("occ"), range(oatiles)}; + v_alpha = {MO("virt"), range(vatiles)}; + o_beta = {MO("occ"), range(obtiles, otiles)}; + v_beta = {MO("virt"), range(vbtiles, vtiles)}; + + auto [p1_va, p2_va] = v_alpha.labels<2>("all"); + auto [p1_vb, p2_vb] = v_beta.labels<2>("all"); + auto [h3_oa, h4_oa] = o_alpha.labels<2>("all"); + auto [h3_ob, h4_ob] = o_beta.labels<2>("all"); + + Tensor d_e{}; + + Tensor t2_aaaa = {{v_alpha, v_alpha, o_alpha, o_alpha}, {2, 2}}; + + CCSE_Tensors f1_oo{MO, {O, O}, "f1_oo", {"aa", "bb"}}; + CCSE_Tensors f1_ov{MO, {O, V}, "f1_ov", {"aa", "bb"}}; + CCSE_Tensors f1_vv{MO, {V, V}, "f1_vv", {"aa", "bb"}}; + + CCSE_Tensors chol3d_oo{MO, {O, O, CI}, "chol3d_oo", {"aa", "bb"}}; + CCSE_Tensors chol3d_ov{MO, {O, V, CI}, "chol3d_ov", {"aa", "bb"}}; + CCSE_Tensors chol3d_vv{MO, {V, V, CI}, "chol3d_vv", {"aa", "bb"}}; + + std::vector> f1_se{f1_oo, f1_ov, f1_vv}; + std::vector> chol3d_se{chol3d_oo, chol3d_ov, chol3d_vv}; + + _a01V = {CI}; + _a02 = CCSE_Tensors{MO, {O, O, CI}, "_a02", {"aa"}}; + _a03 = CCSE_Tensors{MO, {O, V, CI}, "_a03", {"aa"}}; + _a004 = CCSE_Tensors{MO, {V, V, O, O}, "_a004", {"aaaa", "abab"}}; + + t2_aaaa_temp = {v_alpha, v_alpha, o_alpha, o_alpha}; + i0_temp = {v_beta, v_alpha, o_beta, o_alpha}; + + // Intermediates + // T1 + _a02V = {CI}; + _a01 = CCSE_Tensors{MO, {O, O, CI}, "_a01", {"aa"}}; + _a04 = CCSE_Tensors{MO, {O, O}, "_a04", {"aa"}}; + _a05 = CCSE_Tensors{MO, {O, V}, "_a05", {"aa", "bb"}}; + _a06 = CCSE_Tensors{MO, {V, O, CI}, "_a06", {"aa"}}; + + // T2 + _a007V = {CI}; + _a001 = CCSE_Tensors{MO, {V, V}, "_a001", {"aa", "bb"}}; + _a006 = CCSE_Tensors{MO, {O, O}, "_a006", {"aa", "bb"}}; + + _a008 = CCSE_Tensors{MO, {O, O, CI}, "_a008", {"aa"}}; + _a009 = CCSE_Tensors{MO, {O, O, CI}, "_a009", {"aa", "bb"}}; + _a017 = CCSE_Tensors{MO, {V, O, CI}, "_a017", {"aa", "bb"}}; + _a021 = CCSE_Tensors{MO, {V, V, CI}, "_a021", {"aa", "bb"}}; + + _a019 = CCSE_Tensors{MO, {O, O, O, O}, "_a019", {"abab"}}; + _a022 = CCSE_Tensors{MO, {V, V, V, V}, "_a022", {"abab"}}; + _a020 = CCSE_Tensors{MO, {V, O, V, O}, "_a020", {"aaaa", "baba", "baab", "bbbb"}}; + + sch.allocate(t2_aaaa); + sch.allocate(d_e, i0_temp, t2_aaaa_temp, _a01V); + CCSE_Tensors::allocate_list(sch, f1_oo, f1_ov, f1_vv, chol3d_oo, chol3d_ov, chol3d_vv); + CCSE_Tensors::allocate_list(sch, _a02, _a03); + + // allocate all intermediates + sch.allocate(_a02V, _a007V); + CCSE_Tensors::allocate_list(sch, _a004, _a01, _a04, _a05, _a06, _a001, _a006, _a008, _a009, + _a017, _a019, _a020, _a021, _a022); + sch.execute(); + + // // clang-format off + // sch + // (_a004("aaaa")(p1_va, p2_va, h4_oa, h3_oa) = 1.0 * chol3d_ov("aa")(h4_oa, p1_va, cind) * + // chol3d_ov("aa")(h3_oa, p2_va, cind)) .exact_copy(_a004("abab")(p1_va, p1_vb, h3_oa, h3_ob), + // _a004("aaaa")(p1_va, p1_vb, h3_oa, h3_ob)) + // ; + // // clang-format on + + // sch.execute(exhw); + + const auto timer_start = std::chrono::high_resolution_clock::now(); + + // ccsd_e_cs(sch, MO, CI, d_e, t1_aa, t2_abab, t2_aaaa, f1_se, chol3d_se); + // ccsd_t1_cs(sch, MO, CI, r1_aa, t1_aa, t2_abab, f1_se, chol3d_se); + ccsd_t2_cs(sch, MO, CI, r2_abab, t1_aa, t2_abab, t2_aaaa, f1_se, chol3d_se); + + sch.execute(exhw, profile); + + const auto timer_end = std::chrono::high_resolution_clock::now(); + auto iter_time = + std::chrono::duration_cast>((timer_end - timer_start)).count(); + + if(ec.print()) std::cout << "Tiem taken for closed-shell CD-CCSD: " << iter_time << std::endl; + + if(profile && ec.print()) { + std::string profile_csv = "ccsd_profile.csv"; + std::ofstream pds(profile_csv, std::ios::out); + if(!pds) std::cerr << "Error opening file " << profile_csv << std::endl; + std::string header = "ID;Level;OP;total_op_time_min;total_op_time_max;total_op_time_avg;"; + header += "get_time_min;get_time_max;get_time_avg;gemm_time_min;"; + header += "gemm_time_max;gemm_time_avg;acc_time_min;acc_time_max;acc_time_avg"; + pds << header << std::endl; + pds << ec.get_profile_data().str() << std::endl; + pds.close(); + } + + // deallocate all intermediates + sch.deallocate(_a02V, _a007V); + CCSE_Tensors::deallocate_list(sch, _a004, _a01, _a04, _a05, _a06, _a001, _a006, _a008, _a009, + _a017, _a019, _a020, _a021, _a022); + + sch.deallocate(d_e, i0_temp, t2_aaaa_temp, _a01V); + CCSE_Tensors::deallocate_list(sch, _a02, _a03); + CCSE_Tensors::deallocate_list(sch, f1_oo, f1_ov, f1_vv, chol3d_oo, chol3d_ov, chol3d_vv); + + sch.execute(); + + sch.deallocate(t1_aa, t2_abab, r1_aa, r2_abab, d_f1, t2_aaaa).execute(); + + tamm::finalize(); + + return 0; +} diff --git a/tests/tamm/Test_Mult_Ops.cpp b/tests/tamm/Test_Mult_Ops.cpp index 074e3b197..afcbff732 100644 --- a/tests/tamm/Test_Mult_Ops.cpp +++ b/tests/tamm/Test_Mult_Ops.cpp @@ -162,6 +162,21 @@ void test_3_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_ sch.deallocate(A, B, C).execute(); } +template +void norm_check(Tensor tensor, bool ci_check) { + if(!ci_check) return; + T tnorm = tamm::norm(tensor); + double tval = 0; + if constexpr(tamm::internal::is_complex_v) tval = tnorm.real(); + else tval = tnorm; + const bool mop_pass = (std::fabs(tval - 2.625e8) <= 1e-9); + if(!mop_pass) { + if(tensor.execution_context()->pg().rank() == 0) + std::cout << "norm value: " << tval << ", expected: 2.625e8" << std::endl; + EXPECTS(mop_pass); + } +} + template void test_4_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_hw, bool profile) { TiledIndexSpace tis1{IndexSpace{range(N)}, tilesize}; @@ -191,7 +206,7 @@ void test_4_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_ if(sch.ec().pg().rank() == 0) std::cout << "4-D Tensor contraction (R=RxR) with " << N << " indices tiled with " << tilesize << " : " << mult_time << std::endl; - + norm_check(C, N == 50); sch.deallocate(A, B, C).execute(); } @@ -217,6 +232,7 @@ void test_4_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_ std::cout << "4-D Tensor contraction (C=RxR) with " << N << " indices tiled with " << tilesize << " : " << mult_time << std::endl; + norm_check(C, N == 50); sch.deallocate(A, B, C).execute(); } @@ -242,6 +258,7 @@ void test_4_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_ std::cout << "4-D Tensor contraction (C=RxC) with " << N << " indices tiled with " << tilesize << " : " << mult_time << std::endl; + norm_check(C, N == 50); sch.deallocate(A, B, C).execute(); } @@ -267,6 +284,7 @@ void test_4_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_ std::cout << "4-D Tensor contraction (C=CxR) with " << N << " indices tiled with " << tilesize << " : " << mult_time << std::endl; + norm_check(C, N == 50); sch.deallocate(A, B, C).execute(); } @@ -292,6 +310,7 @@ void test_4_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_ std::cout << "4-D Tensor contraction (R=CxR) with " << N << " indices tiled with " << tilesize << " : " << mult_time << std::endl; + norm_check(C, N == 50); sch.deallocate(A, B, C).execute(); } @@ -317,6 +336,7 @@ void test_4_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_ std::cout << "4-D Tensor contraction (R=RxC) with " << N << " indices tiled with " << tilesize << " : " << mult_time << std::endl; + norm_check(C, N == 50); sch.deallocate(A, B, C).execute(); } @@ -342,6 +362,7 @@ void test_4_dim_mult_op(Scheduler& sch, size_t N, Tile tilesize, ExecutionHW ex_ std::cout << "4-D Tensor contraction (C=CxC) with " << N << " indices tiled with " << tilesize << " : " << mult_time << std::endl; + norm_check(C, N == 50); sch.deallocate(A, B, C).execute(); } } diff --git a/tests/tamm/ccse_tensors.hpp b/tests/tamm/ccse_tensors.hpp new file mode 100644 index 000000000..4cb29a3f2 --- /dev/null +++ b/tests/tamm/ccse_tensors.hpp @@ -0,0 +1,247 @@ +#pragma once + +#include +#include + +using namespace tamm; + +/** + * struct for managing CC spin-explicit tensors + * CCSE_Tensors cctens{MO,{V,O},"tensor_name",{"aa","bb"}}; + * CCSE_Tensors cctens{MO,{V,O,CI},"tensor_name",{"aa","bb"}}; + * CCSE_Tensors cctens{MO,{V,O,V,O},"tensor_name",{"aaaa","baba","baab","bbbb"}}; + */ +template +class CCSE_Tensors { + std::map> tmap; + std::vector> allocated_tensors; + + std::string tname; + bool is_mo_3d{}; // true only when all dims of a 3D tensor are MO + +public: + std::vector vblocks; + + void deallocate() { + ExecutionContext& ec = get_ec(allocated_tensors[0]()); + Scheduler sch{ec}; + for(auto x: allocated_tensors) sch.deallocate(x); + sch.execute(); + } + + T sum_tensor_sizes() { + T total_size{}; + for(auto x: allocated_tensors) + total_size += (compute_tensor_size(x) * 8) / (1024 * 1024 * 1024.0); + return total_size; + } + + Tensor operator()(std::string block) { + if(tmap.find(block) == tmap.end()) + tamm_terminate("Error: tensor [" + tname + "]: block [" + block + + "] requested does not exist"); + return tmap[block]; + } + + TiledIndexSpaceVec construct_tis(const TiledIndexSpace& MO, const TiledIndexSpaceVec tis, + const std::vector btype) { + const auto ndims = tis.size(); + + const TiledIndexSpace& O = MO("occ"); + const TiledIndexSpace& V = MO("virt"); + + const TiledIndexSpace o_alpha = MO("occ_alpha"); + const TiledIndexSpace o_beta = MO("occ_beta"); + const TiledIndexSpace v_alpha = MO("virt_alpha"); + const TiledIndexSpace v_beta = MO("virt_beta"); + + TiledIndexSpaceVec btis; + for(size_t x = 0; x < ndims; x++) { + // assuming only 3D tensor has an independent index space + if(tis[x] == O) btype[x] == 0 ? btis.push_back(o_alpha) : btis.push_back(o_beta); + else if(tis[x] == V) btype[x] == 0 ? btis.push_back(v_alpha) : btis.push_back(v_beta); + else if(ndims == 3 && !is_mo_3d) { btis.push_back(tis[x]); } + } + + return btis; + } + + void allocate(ExecutionContext& ec) { + Scheduler sch{ec}; + for(auto x: allocated_tensors) sch.allocate(x); + sch.execute(); + } + + CCSE_Tensors() {} + + /** + * @brief Construct a group of spin-explicit tensors to be used as a single tensor + * + * @param [in] MO the MO tiled index space + * @param [in] tis the dimensions specified using O,V tiled index spaces + * @param [in] tname tensor name as string + * @param [in] blocks specify the required blocks as strings + */ + + CCSE_Tensors(const TiledIndexSpace& MO, TiledIndexSpaceVec tis, std::string tensor_name, + std::vector blocks) { + tname = tensor_name; + vblocks = blocks; + + const auto ndims = tis.size(); + std::string err_msg = "Error in tensor [" + tname + "] declaration"; + if(ndims < 2 || ndims > 4) tamm_terminate(err_msg + ": Only 2,3,4D tensors are allowed"); + + is_mo_3d = true; + const TiledIndexSpace& O = MO("occ"); + const TiledIndexSpace& V = MO("virt"); + for(size_t x = 0; x < tis.size(); x++) { + if(tis[x] != O && tis[x] != V) { + if(ndims == 3) is_mo_3d = false; // assuming only 3D tensors have an independent index space + else tamm_terminate(err_msg + ": Only O,V tiled index spaces can be specified"); + } + } + + std::vector allowed_blocks = {"aa", "bb"}; + if(ndims == 3 && is_mo_3d) allowed_blocks = {"aaa", "baa", "abb", "bbb"}; + else if(ndims == 4) allowed_blocks = {"aaaa", "abab", "bbbb", "abba", "baab", "baba"}; + + if(blocks.size() == 0) + tamm_terminate(err_msg + ": Please specify the tensor blocks to be allocated"); + + for(auto x: blocks) { + if(std::find(allowed_blocks.begin(), allowed_blocks.end(), x) == allowed_blocks.end()) { + if(ndims == 2 || (ndims == 3 && !is_mo_3d)) + tamm_terminate(err_msg + ": Invalid block [" + x + + "] specified, allowed blocks are [aa|bb]"); + else if(ndims == 3 && is_mo_3d) + tamm_terminate(err_msg + ": Invalid block [" + x + + "] specified, allowed blocks are [aaa|baa|abb|bbb]"); + else + tamm_terminate(err_msg + ": Invalid block [" + x + + "] specified, allowed blocks are [aaaa|abab|bbbb|abba|baab|baba]"); + } + } + + // a=0,b=1 + if(ndims == 2 || (ndims == 3 && !is_mo_3d)) { + if(std::find(blocks.begin(), blocks.end(), "aa") != blocks.end()) { + Tensor aa{construct_tis(MO, tis, {0, 0})}; + tmap["aa"] = aa; + allocated_tensors.push_back(aa); + } + if(std::find(blocks.begin(), blocks.end(), "bb") != blocks.end()) { + Tensor bb{construct_tis(MO, tis, {1, 1})}; + tmap["bb"] = bb; + allocated_tensors.push_back(bb); + } + } + else if(ndims == 3 && is_mo_3d) { + if(std::find(blocks.begin(), blocks.end(), "aaa") != blocks.end()) { + Tensor aaa{construct_tis(MO, tis, {0, 0, 0})}; + tmap["aaa"] = aaa; + allocated_tensors.push_back(aaa); + } + if(std::find(blocks.begin(), blocks.end(), "baa") != blocks.end()) { + Tensor baa{construct_tis(MO, tis, {1, 0, 0})}; + tmap["baa"] = baa; + allocated_tensors.push_back(baa); + } + if(std::find(blocks.begin(), blocks.end(), "abb") != blocks.end()) { + Tensor abb{construct_tis(MO, tis, {0, 1, 1})}; + tmap["abb"] = abb; + allocated_tensors.push_back(abb); + } + if(std::find(blocks.begin(), blocks.end(), "bbb") != blocks.end()) { + Tensor bbb{construct_tis(MO, tis, {1, 1, 1})}; + tmap["bbb"] = bbb; + allocated_tensors.push_back(bbb); + } + } + else { + if(std::find(blocks.begin(), blocks.end(), "aaaa") != blocks.end()) { + Tensor aaaa{construct_tis(MO, tis, {0, 0, 0, 0})}; + tmap["aaaa"] = aaaa; + allocated_tensors.push_back(aaaa); + } + if(std::find(blocks.begin(), blocks.end(), "abab") != blocks.end()) { + Tensor abab{construct_tis(MO, tis, {0, 1, 0, 1})}; + tmap["abab"] = abab; + allocated_tensors.push_back(abab); + } + if(std::find(blocks.begin(), blocks.end(), "bbbb") != blocks.end()) { + Tensor bbbb{construct_tis(MO, tis, {1, 1, 1, 1})}; + tmap["bbbb"] = bbbb; + allocated_tensors.push_back(bbbb); + } + if(std::find(blocks.begin(), blocks.end(), "abba") != blocks.end()) { + Tensor abba{construct_tis(MO, tis, {0, 1, 1, 0})}; + tmap["abba"] = abba; + allocated_tensors.push_back(abba); + } + if(std::find(blocks.begin(), blocks.end(), "baab") != blocks.end()) { + Tensor baab{construct_tis(MO, tis, {1, 0, 0, 1})}; + tmap["baab"] = baab; + allocated_tensors.push_back(baab); + } + if(std::find(blocks.begin(), blocks.end(), "baba") != blocks.end()) { + Tensor baba{construct_tis(MO, tis, {1, 0, 1, 0})}; + tmap["baba"] = baba; + allocated_tensors.push_back(baba); + } + } + } + + // static + static void alloc_list(Scheduler& sch) {} + + template + static void alloc_list(Scheduler& sch, CCSE_Tensors& ccset, Args&... rest) { + for(auto x: ccset.allocated_tensors) sch.allocate(x); + alloc_list(sch, rest...); + } + + template + static void allocate_list(Scheduler& sch, CCSE_Tensors& ccset, Args&... rest) { + alloc_list(sch, ccset, rest...); + } + + static void dealloc_list(Scheduler& sch) {} + + template + static void dealloc_list(Scheduler& sch, CCSE_Tensors& ccset, Args&... rest) { + for(auto x: ccset.allocated_tensors) sch.deallocate(x); + dealloc_list(sch, rest...); + } + + template + static void deallocate_list(Scheduler& sch, CCSE_Tensors& ccset, Args&... rest) { + dealloc_list(sch, ccset, rest...); + } + + template + static auto sum_tensor_sizes_list(Args&... ccsetensor) { + return (ccsetensor.sum_tensor_sizes() + ...); + } + + static void copy(Scheduler& sch, CCSE_Tensors& src, CCSE_Tensors& dest, + bool update = false) { + for(auto x: src.vblocks) { + if(update) sch(dest(x)() += src(x)()); + else sch(dest(x)() = src(x)()); + } + } + + static void initialize_list(Scheduler& sch, T value) {} + + template + static void initialize_list(Scheduler& sch, T value, CCSE_Tensors& ccset, Args&... rest) { + for(auto x: ccset.vblocks) { sch(ccset(x)() = value); } + initialize_list(sch, value, rest...); + } + + template + static void initialize(Scheduler& sch, T value, CCSE_Tensors& ccset, Args&... rest) { + initialize_list(sch, value, ccset, rest...); + } +}; diff --git a/tests/tamm/test_tamm.cmake b/tests/tamm/test_tamm.cmake index bce881e6e..188d2803a 100644 --- a/tests/tamm/test_tamm.cmake +++ b/tests/tamm/test_tamm.cmake @@ -11,7 +11,7 @@ add_mpi_unit_test(Test_DependentSpace 2 "") # add_mpi_unit_test(Test_Eigen 2 "") # add_mpi_unit_test(Test_PG 2 "") add_mpi_unit_test(Test_Utils 2 "") -add_mpi_unit_test(Test_Mult_Ops 2 "10 2" ) +add_mpi_unit_test(Test_Mult_Ops 2 "50 20" ) add_mpi_unit_test(Test_DLPNO_Ops 2 "10 2" ) # add_mpi_unit_test(Test_OpDAG 2 "") add_mpi_unit_test(Test_Opmin 2 "") @@ -19,6 +19,7 @@ add_mpi_unit_test(Test_IO 2 "10 10" ) add_mpi_unit_test(Test_EVP 2 "10 10" ) add_mpi_unit_test(Test_Unit_Tiled_View_Tensor 2 "") add_mpi_unit_test(Test_Mem_Profiler 2 "") +# add_mpi_unit_test(Test_CCSD 2 "10 40 60 40") # add_mpi_unit_test(Test_ViewTensor 2 "") # add_mpi_unit_test(Test_QR 2 "")