From 6c8f59cf0198f7f14df26fcb9717d60efe7a70b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 9 Mar 2022 12:01:10 +0100 Subject: [PATCH 001/106] #1672: lb: do not loop over comm_data twice --- .../collection/balance/lb_invoke/lb_manager.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 5aa7af07c6..b2885b55e2 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -656,7 +656,13 @@ void LBManager::computeStatistics( } double comm_load = 0.0; + std::vector obj_comm; for (auto&& elm : *comm_data) { + // Only count object-to-object direct edges in the Object_comm statistics + if (elm.first.cat_ == elm::CommCategory::SendRecv and not elm.first.selfEdge()) { + obj_comm.emplace_back(LoadData{lb::Statistic::Object_comm, elm.second.bytes}); + } + if (not comm_collectives and isCollectiveComm(elm.first.cat_)) { continue; } @@ -668,17 +674,6 @@ void LBManager::computeStatistics( } lstats.emplace_back(LoadData{lb::Statistic::Rank_comm, comm_load}); - - std::vector obj_comm; - for (auto&& elm : *comm_data) { - // Only count object-to-object direct edges in the Object_comm statistics - if (elm.first.cat_ == elm::CommCategory::SendRecv and not elm.first.selfEdge()) { - obj_comm.emplace_back( - LoadData{lb::Statistic::Object_comm, elm.second.bytes} - ); - } - } - lstats.emplace_back(reduceVec( lb::Statistic::Object_comm, std::move(obj_comm) )); From cc431e36df9090745e72bdf5c6a7abcd62d68f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Mon, 21 Feb 2022 13:24:55 +0100 Subject: [PATCH 002/106] #1672: lb: add alpha, beta and gamma parameters to TemperedLB --- .../balance/temperedlb/temperedlb.cc | 36 +++++++++++++++++-- .../balance/temperedlb/temperedlb.h | 3 ++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 8912463bb3..06d4239847 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -250,6 +250,33 @@ Default: false When an object load exceeds the processor-average load (i.e., we have a "long pole"), adjust the target load to be the maximum object load ("longest pole") instead of the processor-average load. +)" + }, + { + "alpha", + R"( +Values: +Default: 1.0 +Description: + Load part coefficient in affine combination of load and communication. +)" + }, + { + "beta", + R"( +Values: +Default: 0.0 +Description: + Communication part coefficient in affine combination of load and communication. +)" + }, + { + "gamma", + R"( +Values: +Default: 0.0 +Description: + ... )" }, }; @@ -353,6 +380,10 @@ void TemperedLB::inputParams(balance::SpecEntry* spec) { num_iters_ = spec->getOrDefault("iters", num_iters_); num_trials_ = spec->getOrDefault("trials", num_trials_); + alpha_ = spec->getOrDefault("alpha", alpha_); + beta_ = spec->getOrDefault("beta", beta_); + gamma_ = spec->getOrDefault("gamma", gamma_); + deterministic_ = spec->getOrDefault("deterministic", deterministic_); rollback_ = spec->getOrDefault("rollback", rollback_); target_pole_ = spec->getOrDefault("targetpole", target_pole_); @@ -499,9 +530,10 @@ void TemperedLB::doLBStages(TimeType start_imb) { cur_objs_.clear(); for (auto obj : *load_model_) { if (obj.isMigratable()) { - cur_objs_[obj] = load_model_->getWork( + // TODO: `beta_ * communication` component is still missing here + cur_objs_[obj] = alpha_ * load_model_->getWork( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} - ); + ) + gamma_; } } this_new_load_ = this_load; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index f12f33ad3d..2032b42b0b 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -126,6 +126,9 @@ struct TemperedLB : BaseLB { uint8_t k_cur_ = 0; uint16_t iter_ = 0; uint16_t trial_ = 0; + double alpha_ = 1.0; + double beta_ = 0.0; + double gamma_ = 0.0; uint16_t num_iters_ = 4; /** * \brief Number of trials From 63c7ee10dd41e8d8fb910b949b1dfa9d0d352037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 11 Mar 2022 14:35:19 +0100 Subject: [PATCH 003/106] #1672: lb: rename getWork to getLoad --- src/vt/vrt/collection/balance/baselb/load_sampler.cc | 2 +- src/vt/vrt/collection/balance/greedylb/greedylb.cc | 2 +- src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc | 2 +- src/vt/vrt/collection/balance/lb_common.cc | 4 ++-- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 2 +- src/vt/vrt/collection/balance/model/comm_overhead.cc | 6 +++--- src/vt/vrt/collection/balance/model/comm_overhead.h | 2 +- src/vt/vrt/collection/balance/model/composed_model.cc | 4 ++-- src/vt/vrt/collection/balance/model/composed_model.h | 2 +- src/vt/vrt/collection/balance/model/linear_model.cc | 6 +++--- src/vt/vrt/collection/balance/model/linear_model.h | 2 +- src/vt/vrt/collection/balance/model/load_model.h | 4 ++-- src/vt/vrt/collection/balance/model/multiple_phases.cc | 6 +++--- src/vt/vrt/collection/balance/model/multiple_phases.h | 2 +- src/vt/vrt/collection/balance/model/naive_persistence.cc | 4 ++-- src/vt/vrt/collection/balance/model/naive_persistence.h | 2 +- src/vt/vrt/collection/balance/model/norm.cc | 8 ++++---- src/vt/vrt/collection/balance/model/norm.h | 2 +- src/vt/vrt/collection/balance/model/per_collection.cc | 6 +++--- src/vt/vrt/collection/balance/model/per_collection.h | 2 +- .../collection/balance/model/persistence_median_last_n.cc | 6 +++--- .../collection/balance/model/persistence_median_last_n.h | 2 +- .../vrt/collection/balance/model/proposed_reassignment.cc | 4 ++-- .../vrt/collection/balance/model/proposed_reassignment.h | 2 +- src/vt/vrt/collection/balance/model/raw_data.cc | 2 +- src/vt/vrt/collection/balance/model/raw_data.h | 3 +-- src/vt/vrt/collection/balance/model/select_subphases.cc | 6 +++--- src/vt/vrt/collection/balance/model/select_subphases.h | 2 +- src/vt/vrt/collection/balance/rotatelb/rotatelb.cc | 2 +- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc | 2 +- tests/unit/collection/test_model_comm_overhead.nompi.cc | 4 ++-- tests/unit/collection/test_model_linear_model.nompi.cc | 4 ++-- tests/unit/collection/test_model_multiple_phases.nompi.cc | 4 ++-- .../unit/collection/test_model_naive_persistence.nompi.cc | 4 ++-- tests/unit/collection/test_model_norm.nompi.cc | 8 ++++---- .../unit/collection/test_model_per_collection.extended.cc | 4 ++-- .../test_model_persistence_median_last_n.nompi.cc | 4 ++-- tests/unit/collection/test_model_raw_data.nompi.cc | 4 ++-- .../unit/collection/test_model_select_subphases.nompi.cc | 6 +++--- 40 files changed, 72 insertions(+), 73 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/load_sampler.cc b/src/vt/vrt/collection/balance/baselb/load_sampler.cc index 0a7425ce58..05dd76c80d 100644 --- a/src/vt/vrt/collection/balance/baselb/load_sampler.cc +++ b/src/vt/vrt/collection/balance/baselb/load_sampler.cc @@ -49,7 +49,7 @@ namespace vt { namespace vrt { namespace collection { namespace lb { void LoadSamplerBaseLB::buildHistogram() { for (auto obj : *load_model_) { - TimeTypeWrapper load = load_model_->getWork( + TimeTypeWrapper load = load_model_->getLoad( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); auto const& load_milli = loadMilli(load.seconds()); diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb.cc b/src/vt/vrt/collection/balance/greedylb/greedylb.cc index 2905d53b11..810f7a134d 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb.cc +++ b/src/vt/vrt/collection/balance/greedylb/greedylb.cc @@ -413,7 +413,7 @@ void GreedyLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto const& obj_time_milli = loadMilli(load_model_->getWork(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + auto const& obj_time_milli = loadMilli(load_model_->getLoad(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); this_load -= obj_time_milli; diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc index 773222cffd..f638bafd28 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc @@ -311,7 +311,7 @@ void HierarchicalLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto const& obj_time_milli = loadMilli(load_model_->getWork(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + auto const& obj_time_milli = loadMilli(load_model_->getLoad(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); this_load -= obj_time_milli; diff --git a/src/vt/vrt/collection/balance/lb_common.cc b/src/vt/vrt/collection/balance/lb_common.cc index fadb75f356..dd50575542 100644 --- a/src/vt/vrt/collection/balance/lb_common.cc +++ b/src/vt/vrt/collection/balance/lb_common.cc @@ -61,11 +61,11 @@ LoadSummary getObjectLoads( LoadModel* model, ElementIDStruct object, PhaseOffset when ) { LoadSummary ret; - ret.whole_phase_load = model->getWork(object, {when.phases, PhaseOffset::WHOLE_PHASE}); + ret.whole_phase_load = model->getLoad(object, {when.phases, PhaseOffset::WHOLE_PHASE}); unsigned int subphases = model->getNumSubphases(); for (unsigned int i = 0; i < subphases; ++i) - ret.subphase_loads.push_back(model->getWork(object, {when.phases, i})); + ret.subphase_loads.push_back(model->getLoad(object, {when.phases, i})); return ret; } diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index b2885b55e2..fbddd73d31 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -608,7 +608,7 @@ void LBManager::computeStatistics( total_load_from_model = 0.; std::vector obj_load_model; for (auto elm : *model) { - auto work = model->getWork( + auto work = model->getLoad( elm, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); obj_load_model.emplace_back( diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.cc b/src/vt/vrt/collection/balance/model/comm_overhead.cc index 93bcce688b..27050fd086 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.cc +++ b/src/vt/vrt/collection/balance/model/comm_overhead.cc @@ -60,8 +60,8 @@ void CommOverhead::setLoads(std::unordered_map const* pr ComposedModel::setLoads(proc_load, proc_comm); } -TimeType CommOverhead::getWork(ElementIDStruct object, PhaseOffset offset) { - auto work = ComposedModel::getWork(object, offset); +TimeType CommOverhead::getLoad(ElementIDStruct object, PhaseOffset offset) { + auto work = ComposedModel::getLoad(object, offset); auto phase = getNumCompletedPhases() + offset.phases; auto& comm = proc_comm_->at(phase); @@ -79,7 +79,7 @@ TimeType CommOverhead::getWork(ElementIDStruct object, PhaseOffset offset) { return work + overhead; } else { // @todo: we don't record comm costs for each subphase---split it proportionally - auto whole_phase_work = ComposedModel::getWork(object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE}); + auto whole_phase_work = ComposedModel::getLoad(object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE}); return work + overhead * ( static_cast(work)/whole_phase_work ); } } diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.h b/src/vt/vrt/collection/balance/model/comm_overhead.h index 505f83d05e..3b23a3ae73 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.h +++ b/src/vt/vrt/collection/balance/model/comm_overhead.h @@ -68,7 +68,7 @@ struct CommOverhead : public ComposedModel { void setLoads(std::unordered_map const* proc_load, std::unordered_map const* proc_comm) override; - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; private: std::unordered_map const* proc_comm_; /**< Underlying comm data */ diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc index b39b7c95c3..1272564164 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.cc +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -54,8 +54,8 @@ void ComposedModel::updateLoads(PhaseType last_completed_phase) { base_->updateLoads(last_completed_phase); } -TimeType ComposedModel::getWork(ElementIDStruct object, PhaseOffset when) { - return base_->getWork(object, when); +TimeType ComposedModel::getLoad(ElementIDStruct object, PhaseOffset when) { + return base_->getLoad(object, when); } bool ComposedModel::hasRawLoad() const { diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index 92ea2a6e86..f170075493 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -69,7 +69,7 @@ class ComposedModel : public LoadModel void updateLoads(PhaseType last_completed_phase) override; - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; diff --git a/src/vt/vrt/collection/balance/model/linear_model.cc b/src/vt/vrt/collection/balance/model/linear_model.cc index 030fccffdb..b967484cfa 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.cc +++ b/src/vt/vrt/collection/balance/model/linear_model.cc @@ -48,12 +48,12 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -TimeType LinearModel::getWork(ElementIDStruct object, PhaseOffset when) { +TimeType LinearModel::getLoad(ElementIDStruct object, PhaseOffset when) { using util::stats::LinearRegression; // Retrospective queries don't call for a prediction if (when.phases < 0) - return ComposedModel::getWork(object, when); + return ComposedModel::getLoad(object, when); std::vector x; std::vector y; @@ -65,7 +65,7 @@ TimeType LinearModel::getWork(ElementIDStruct object, PhaseOffset when) { for (int i = -1 * static_cast(phases); i < 0; i++) { x.emplace_back(i); past_phase.phases = i; - y.emplace_back(ComposedModel::getWork(object, past_phase)); + y.emplace_back(ComposedModel::getLoad(object, past_phase)); } // should we re-create this every time? diff --git a/src/vt/vrt/collection/balance/model/linear_model.h b/src/vt/vrt/collection/balance/model/linear_model.h index e8b515d6a7..5b3a87eaef 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.h +++ b/src/vt/vrt/collection/balance/model/linear_model.h @@ -69,7 +69,7 @@ struct LinearModel : ComposedModel { past_len_(in_past_len) { } - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; private: diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index aa362f7abb..7e53da69c5 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -206,7 +206,7 @@ struct LoadModel * * This would typically be called by LBManager collectively inside * an epoch that can be used for global communication in advance of - * any calls to getWork() + * any calls to getLoad() * * The `setLoads` method must have been called before any call to * this. @@ -224,7 +224,7 @@ struct LoadModel * The `updateLoads` method must have been called before any call to * this. */ - virtual TimeType getWork(ElementIDStruct object, PhaseOffset when) = 0; + virtual TimeType getLoad(ElementIDStruct object, PhaseOffset when) = 0; /** * \brief Whether or not the model is based on the RawData model diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.cc b/src/vt/vrt/collection/balance/model/multiple_phases.cc index a7354e09be..1ba821b3d3 100644 --- a/src/vt/vrt/collection/balance/model/multiple_phases.cc +++ b/src/vt/vrt/collection/balance/model/multiple_phases.cc @@ -45,16 +45,16 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -TimeType MultiplePhases::getWork(ElementIDStruct object, PhaseOffset when) { +TimeType MultiplePhases::getLoad(ElementIDStruct object, PhaseOffset when) { // Retrospective queries don't call for a prediction if (when.phases < 0) - return ComposedModel::getWork(object, when); + return ComposedModel::getLoad(object, when); TimeType sum = 0.0; for (int i = 0; i < future_phase_block_size_; ++i) { PhaseOffset p{future_phase_block_size_*when.phases + i, when.subphase}; - sum += ComposedModel::getWork(object, p); + sum += ComposedModel::getLoad(object, p); } return sum; diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.h b/src/vt/vrt/collection/balance/model/multiple_phases.h index d8bb9ca7b1..ac01f997ad 100644 --- a/src/vt/vrt/collection/balance/model/multiple_phases.h +++ b/src/vt/vrt/collection/balance/model/multiple_phases.h @@ -79,7 +79,7 @@ struct MultiplePhases : ComposedModel { , future_phase_block_size_(in_future_phase_block_size) { } - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; private: int future_phase_block_size_ = 0; diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 7a1de40a4e..2cd10186f0 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -50,12 +50,12 @@ NaivePersistence::NaivePersistence(std::shared_ptr base) : ComposedModel(base) { } -TimeType NaivePersistence::getWork(ElementIDStruct object, PhaseOffset offset) +TimeType NaivePersistence::getLoad(ElementIDStruct object, PhaseOffset offset) { if (offset.phases >= 0) offset.phases = -1; - return ComposedModel::getWork(object, offset); + return ComposedModel::getLoad(object, offset); } TimeType NaivePersistence::getRawLoad(ElementIDStruct object, PhaseOffset offset) diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 79cfe53ad1..8a83b4e353 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -60,7 +60,7 @@ struct NaivePersistence : public ComposedModel { * \param[in] base: The source of underlying load numbers to return; must not be null */ explicit NaivePersistence(std::shared_ptr base); - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset offset) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; }; // class NaivePersistence diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index 51b9d3e047..dca85c6994 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -55,17 +55,17 @@ Norm::Norm(std::shared_ptr base, double power) vtAssert(power >= 0.0, "Reciprocal loads make no sense"); } -TimeType Norm::getWork(ElementIDStruct object, PhaseOffset offset) +TimeType Norm::getLoad(ElementIDStruct object, PhaseOffset offset) { if (offset.subphase != PhaseOffset::WHOLE_PHASE) - return ComposedModel::getWork(object, offset); + return ComposedModel::getLoad(object, offset); if (std::isfinite(power_)) { double sum = 0.0; for (int i = 0; i < getNumSubphases(); ++i) { offset.subphase = i; - auto t = ComposedModel::getWork(object, offset); + auto t = ComposedModel::getLoad(object, offset); sum += std::pow(t, power_); } @@ -76,7 +76,7 @@ TimeType Norm::getWork(ElementIDStruct object, PhaseOffset offset) for (int i = 0; i < getNumSubphases(); ++i) { offset.subphase = i; - auto t = ComposedModel::getWork(object, offset); + auto t = ComposedModel::getLoad(object, offset); max = std::max(max, t); } diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h index b9ba8bd24d..8fee467fc9 100644 --- a/src/vt/vrt/collection/balance/model/norm.h +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -64,7 +64,7 @@ class Norm : public ComposedModel { */ Norm(std::shared_ptr base, double power); - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; private: const double power_; diff --git a/src/vt/vrt/collection/balance/model/per_collection.cc b/src/vt/vrt/collection/balance/model/per_collection.cc index 170d5446ea..54cd37b9f1 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.cc +++ b/src/vt/vrt/collection/balance/model/per_collection.cc @@ -68,14 +68,14 @@ void PerCollection::updateLoads(PhaseType last_completed_phase) { ComposedModel::updateLoads(last_completed_phase); } -TimeType PerCollection::getWork(ElementIDStruct object, PhaseOffset when) { +TimeType PerCollection::getLoad(ElementIDStruct object, PhaseOffset when) { // See if some specific model has been given for the object in question auto mi = models_.find(theNodeLBData()->getCollectionProxyForElement(object)); if (mi != models_.end()) - return mi->second->getWork(object, when); + return mi->second->getLoad(object, when); // Otherwise, default to the given base model - return ComposedModel::getWork(object, when); + return ComposedModel::getLoad(object, when); } bool PerCollection::hasRawLoad() const { diff --git a/src/vt/vrt/collection/balance/model/per_collection.h b/src/vt/vrt/collection/balance/model/per_collection.h index 2be8ac435e..bf180f8a44 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.h +++ b/src/vt/vrt/collection/balance/model/per_collection.h @@ -78,7 +78,7 @@ struct PerCollection : public ComposedModel void updateLoads(PhaseType last_completed_phase) override; - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc index 74aaa05c0c..14e8e2a974 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc @@ -54,17 +54,17 @@ PersistenceMedianLastN::PersistenceMedianLastN(std::shared_ptr base, vtAssert(n > 0, "Cannot take a median over no phases"); } -TimeType PersistenceMedianLastN::getWork(ElementIDStruct object, PhaseOffset when) +TimeType PersistenceMedianLastN::getLoad(ElementIDStruct object, PhaseOffset when) { // Retrospective queries don't call for a prospective calculation if (when.phases < 0) - return ComposedModel::getWork(object, when); + return ComposedModel::getLoad(object, when); unsigned int phases = std::min(n_, getNumCompletedPhases()); std::vector times(phases); for (unsigned int i = 1; i <= phases; ++i) { PhaseOffset p{-1*static_cast(i), when.subphase}; - TimeType t = ComposedModel::getWork(object, p); + TimeType t = ComposedModel::getLoad(object, p); times[i-1] = t; } diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h index ff8e126ee4..aa13132bf3 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h @@ -65,7 +65,7 @@ struct PersistenceMedianLastN : public ComposedModel */ PersistenceMedianLastN(std::shared_ptr base, unsigned int n); - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; private: diff --git a/src/vt/vrt/collection/balance/model/proposed_reassignment.cc b/src/vt/vrt/collection/balance/model/proposed_reassignment.cc index 8c330b6e7e..2ffea77015 100644 --- a/src/vt/vrt/collection/balance/model/proposed_reassignment.cc +++ b/src/vt/vrt/collection/balance/model/proposed_reassignment.cc @@ -93,7 +93,7 @@ int ProposedReassignment::getNumObjects() return base - departing + arriving; } -TimeType ProposedReassignment::getWork(ElementIDStruct object, PhaseOffset when) +TimeType ProposedReassignment::getLoad(ElementIDStruct object, PhaseOffset when) { auto a = reassignment_->arrive_.find(object); if (a != reassignment_->arrive_.end()) { @@ -104,7 +104,7 @@ TimeType ProposedReassignment::getWork(ElementIDStruct object, PhaseOffset when) vtAssert(reassignment_->depart_.find(object) == reassignment_->depart_.end(), "Departing object should not appear as a load query subject"); - return ComposedModel::getWork(object, when); + return ComposedModel::getLoad(object, when); } TimeType ProposedReassignment::getRawLoad(ElementIDStruct object, PhaseOffset when) diff --git a/src/vt/vrt/collection/balance/model/proposed_reassignment.h b/src/vt/vrt/collection/balance/model/proposed_reassignment.h index b5b8e7c6db..9c3c066fc7 100644 --- a/src/vt/vrt/collection/balance/model/proposed_reassignment.h +++ b/src/vt/vrt/collection/balance/model/proposed_reassignment.h @@ -57,7 +57,7 @@ struct ProposedReassignment : public ComposedModel { ObjectIterator begin() override; int getNumObjects() override; - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; private: diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index 339629649b..4438ca17c3 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -94,7 +94,7 @@ int RawData::getNumSubphases() { return subphases; } -TimeType RawData::getWork(ElementIDStruct object, PhaseOffset offset) +TimeType RawData::getLoad(ElementIDStruct object, PhaseOffset offset) { return getRawLoad(object, offset); } diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index 858bf43f55..a751b6d06c 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -59,11 +59,10 @@ namespace vt { namespace vrt { namespace collection { namespace balance { struct RawData : public LoadModel { RawData() = default; void updateLoads(PhaseType last_completed_phase) override; - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override { return true; } TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; - void setLoads(std::unordered_map const* proc_load, std::unordered_map const* proc_comm) override; diff --git a/src/vt/vrt/collection/balance/model/select_subphases.cc b/src/vt/vrt/collection/balance/model/select_subphases.cc index 78db0a9997..3d4128f1cc 100644 --- a/src/vt/vrt/collection/balance/model/select_subphases.cc +++ b/src/vt/vrt/collection/balance/model/select_subphases.cc @@ -58,18 +58,18 @@ SelectSubphases::SelectSubphases(std::shared_ptr base, std::vector base, std::vector subphases); - TimeType getWork(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; int getNumSubphases() override; std::vector subphases_; diff --git a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc index 4b60afefaf..a3fee46c60 100644 --- a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc +++ b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc @@ -76,7 +76,7 @@ void RotateLB::runLB(TimeType) { } for (auto obj : *load_model_) { - TimeTypeWrapper const load = load_model_->getWork(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); + TimeTypeWrapper const load = load_model_->getLoad(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); vt_debug_print( terse, lb, "\t RotateLB::migrating object to: obj={}, load={}, to_node={}\n", diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 06d4239847..f2525e2404 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -531,7 +531,7 @@ void TemperedLB::doLBStages(TimeType start_imb) { for (auto obj : *load_model_) { if (obj.isMigratable()) { // TODO: `beta_ * communication` component is still missing here - cur_objs_[obj] = alpha_ * load_model_->getWork( + cur_objs_[obj] = alpha_ * load_model_->getLoad( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ) + gamma_; } diff --git a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc index 95c37fd41d..575be300c7 100644 --- a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc +++ b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc @@ -483,7 +483,7 @@ std::unique_ptr ZoltanLB::makeGraph() { { int idx = 0; for (auto&& obj : load_objs) { - auto load = load_model_->getWork( + auto load = load_model_->getLoad( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); diff --git a/tests/unit/collection/test_model_comm_overhead.nompi.cc b/tests/unit/collection/test_model_comm_overhead.nompi.cc index ff1549c740..598ac4a5ee 100644 --- a/tests/unit/collection/test_model_comm_overhead.nompi.cc +++ b/tests/unit/collection/test_model_comm_overhead.nompi.cc @@ -86,7 +86,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getWork(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { const auto work = proc_load_->at(0).at(id).whole_phase_load; if (phase.subphase == PhaseOffset::WHOLE_PHASE) { @@ -167,7 +167,7 @@ TEST_F(TestModelCommOverhead, test_model_comm_overhead_1) { ++objects_seen; const auto subphase = num_phases == 0 ? PhaseOffset::WHOLE_PHASE : 1; - auto work_val = test_model->getWork(obj, PhaseOffset{0, subphase}); + auto work_val = test_model->getLoad(obj, PhaseOffset{0, subphase}); EXPECT_EQ(work_val, expected_work[num_phases]) << fmt::format("For element={} on phase={}\n", obj, num_phases); } diff --git a/tests/unit/collection/test_model_linear_model.nompi.cc b/tests/unit/collection/test_model_linear_model.nompi.cc index 7aa4342fde..8aa2223b9e 100644 --- a/tests/unit/collection/test_model_linear_model.nompi.cc +++ b/tests/unit/collection/test_model_linear_model.nompi.cc @@ -79,7 +79,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getWork(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { // Most recent phase will be at the end of vector return proc_load_->at(num_phases + phase.phases).at(id).whole_phase_load; } @@ -149,7 +149,7 @@ TEST_F(TestLinearModel, test_model_linear_model_1) { ++num_phases; for (auto&& obj : *test_model) { - auto work_val = test_model->getWork(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoad(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ( work_val, obj.id == 1 ? expected_data[iter].first : expected_data[iter].second) diff --git a/tests/unit/collection/test_model_multiple_phases.nompi.cc b/tests/unit/collection/test_model_multiple_phases.nompi.cc index 4c2d694897..9fd7c638ab 100644 --- a/tests/unit/collection/test_model_multiple_phases.nompi.cc +++ b/tests/unit/collection/test_model_multiple_phases.nompi.cc @@ -77,7 +77,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getWork(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { // Here we return predicted loads for future phases // For the sake of the test we use values from the past phases return proc_load_->at(phase.phases).at(id).whole_phase_load; @@ -119,7 +119,7 @@ TEST_F(TestModelMultiplePhases, test_model_multiple_phases_1) { test_model->updateLoads(3); for (auto&& obj : *test_model) { - auto work_val = test_model->getWork(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoad(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, obj.id == 1 ? TimeType{100} : TimeType{85}); } } diff --git a/tests/unit/collection/test_model_naive_persistence.nompi.cc b/tests/unit/collection/test_model_naive_persistence.nompi.cc index 48ab9afe35..1b3a1f88b0 100644 --- a/tests/unit/collection/test_model_naive_persistence.nompi.cc +++ b/tests/unit/collection/test_model_naive_persistence.nompi.cc @@ -81,7 +81,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getWork(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { EXPECT_LE(phase.phases, -1); return proc_load_->at(getIndexFromPhase(phase.phases)).at(id).whole_phase_load; } @@ -124,7 +124,7 @@ TEST_F(TestModelNaivePersistence, test_model_naive_persistence_1) { for (auto it = test_model->begin(); it != test_model->end(); ++it) { auto &&obj = *it; for (auto phase : {0, -1, -2, -3, -4}) { - auto work_val = test_model->getWork(obj, PhaseOffset{phase, 1}); + auto work_val = test_model->getLoad(obj, PhaseOffset{phase, 1}); EXPECT_EQ(work_val, proc_loads.at(getIndexFromPhase(phase)).at(obj).whole_phase_load); } } diff --git a/tests/unit/collection/test_model_norm.nompi.cc b/tests/unit/collection/test_model_norm.nompi.cc index 7d118e95d2..dfd22eb046 100644 --- a/tests/unit/collection/test_model_norm.nompi.cc +++ b/tests/unit/collection/test_model_norm.nompi.cc @@ -84,7 +84,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getWork(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { return proc_load_->at(0).at(id).subphase_loads.at(phase.subphase); } @@ -125,7 +125,7 @@ TEST_F(TestModelNorm, test_model_norm_1) { // offset.subphase != PhaseOffset::WHOLE_PHASE // expect work load value for given subphase - auto work_val = test_model->getWork(obj, PhaseOffset{0, iter}); + auto work_val = test_model->getLoad(obj, PhaseOffset{0, iter}); EXPECT_EQ(work_val, proc_load[0][obj].subphase_loads[iter]); } @@ -155,7 +155,7 @@ TEST_F(TestModelNorm, test_model_norm_2) { ++objects_seen; auto work_val = - test_model->getWork(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_NEAR(work_val, expected_norms[obj.id - 1], 0.001); } @@ -184,7 +184,7 @@ TEST_F(TestModelNorm, test_model_norm_3) { ++objects_seen; auto work_val = - test_model->getWork(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, expected_norms[obj.id - 1]); } diff --git a/tests/unit/collection/test_model_per_collection.extended.cc b/tests/unit/collection/test_model_per_collection.extended.cc index ebc2175372..68dc72d279 100644 --- a/tests/unit/collection/test_model_per_collection.extended.cc +++ b/tests/unit/collection/test_model_per_collection.extended.cc @@ -74,7 +74,7 @@ struct ConstantTestModel : ComposedModel { proxy_(in_proxy) { } - TimeType getWork(ElementIDStruct, PhaseOffset) override { + TimeType getLoad(ElementIDStruct, PhaseOffset) override { return static_cast(proxy_); } @@ -153,7 +153,7 @@ TEST_F(TestModelPerCollection, test_model_per_collection_1) { // model to function model->updateLoads(0); for (auto&& obj : *model) { - auto work_val = model->getWork(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = model->getLoad(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); if (id_proxy_map.find(obj) != id_proxy_map.end()) { EXPECT_DOUBLE_EQ(work_val, static_cast(id_proxy_map[obj])); } diff --git a/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc b/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc index 16c33cf74b..d9cfdccf0a 100644 --- a/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc +++ b/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc @@ -79,7 +79,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getWork(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { // Most recent phase will be at the end of vector return proc_load_->at(num_phases + phase.phases).at(id).whole_phase_load; } @@ -148,7 +148,7 @@ TEST_F(TestModelPersistenceMedianLastN, test_model_persistence_median_last_n_1) ++num_phases; for (auto&& obj : *test_model) { - auto work_val = test_model->getWork(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoad(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ( work_val, obj.id == 1 ? expected_medians[iter].first : expected_medians[iter].second) diff --git a/tests/unit/collection/test_model_raw_data.nompi.cc b/tests/unit/collection/test_model_raw_data.nompi.cc index 3a664075f9..916608d80e 100644 --- a/tests/unit/collection/test_model_raw_data.nompi.cc +++ b/tests/unit/collection/test_model_raw_data.nompi.cc @@ -101,10 +101,10 @@ TEST_F(TestRawData, test_model_raw_data_scalar) { EXPECT_TRUE(obj.id == 1 || obj.id == 2); objects_seen++; - auto work_val = test_model->getWork(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoad(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, load_holder[iter][obj].whole_phase_load); - auto sub_work_val = test_model->getWork(obj, PhaseOffset{-1, 0}); + auto sub_work_val = test_model->getLoad(obj, PhaseOffset{-1, 0}); EXPECT_EQ(sub_work_val, load_holder[iter][obj].subphase_loads[0]); auto raw_load_val = test_model->getRawLoad(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}); diff --git a/tests/unit/collection/test_model_select_subphases.nompi.cc b/tests/unit/collection/test_model_select_subphases.nompi.cc index 2e920359fa..fbd271f9f0 100644 --- a/tests/unit/collection/test_model_select_subphases.nompi.cc +++ b/tests/unit/collection/test_model_select_subphases.nompi.cc @@ -83,7 +83,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getWork(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { return proc_load_->at(0).at(id).subphase_loads.at(phase.subphase); } @@ -146,7 +146,7 @@ TEST_F(TestModelSelectSubphases, test_model_select_subphases_1) { // offset.subphase != PhaseOffset::WHOLE_PHASE // expect work load value for given subphase - auto work_val = test_model->getWork(obj, PhaseOffset{0, iter}); + auto work_val = test_model->getLoad(obj, PhaseOffset{0, iter}); EXPECT_EQ(work_val, expected_values[obj][iter]); } @@ -187,7 +187,7 @@ TEST_F(TestModelSelectSubphases, test_model_select_subphases_2) { ++objects_seen; auto work_val = - test_model->getWork(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, expected_values[obj]); } From 26e478bbae56e75ddfd228ff78d2bd0f961f0291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 11 Mar 2022 15:01:03 +0100 Subject: [PATCH 004/106] #1672: lb: add getTotalWork and getComm methods --- src/vt/elm/elm_comm.h | 1 + .../vrt/collection/balance/model/load_model.h | 39 +++++++++++++++++-- .../vrt/collection/balance/model/raw_data.cc | 22 +++++++++++ .../vrt/collection/balance/model/raw_data.h | 1 + 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/vt/elm/elm_comm.h b/src/vt/elm/elm_comm.h index a4440f794f..60be58c073 100644 --- a/src/vt/elm/elm_comm.h +++ b/src/vt/elm/elm_comm.h @@ -120,6 +120,7 @@ struct CommKey { ElementIDType fromNode() const { return nfrom_; } ElementIDType toNode() const { return nto_; } ElementIDStruct edgeID() const { return edge_id_; } + CommCategory commCategory() const { return cat_; } bool selfEdge() const { return cat_ == CommCategory::SendRecv and from_ == to_; } bool offNode() const { diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 7e53da69c5..e911d58082 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -45,6 +45,7 @@ #define INCLUDED_VT_VRT_COLLECTION_BALANCE_MODEL_LOAD_MODEL_H #include "vt/config.h" +#include "vt/timing/timing_type.h" #include "vt/vrt/collection/balance/lb_common.h" #include "vt/elm/elm_comm.h" @@ -238,9 +239,6 @@ struct LoadModel * \param[in] when The interval in which the raw load is desired * * \return How much computation time the object required - * - * The `updateLoads` method must have been called before any call to - * this. */ virtual TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) { vtAbort( @@ -249,6 +247,41 @@ struct LoadModel return 0.0; }; + /** + * \brief Provide an estimate of the communication cost for a given object + * during a specified interval + * + * \param[in] object The object whose communication is desired + * \param[in] when The interval in which the communication takes place + * + * \return How much communication time the object is estimated to require + * + * The `updateLoads` method must have been called before any call to + * this. + */ + virtual TimeType getComm(ElementIDStruct object, PhaseOffset when) { + return {}; + } + + /** + * \brief Provide an estimate of the total work for a given object during + * a specified interval + * + * \param[in] object The object whose total work is desired + * \param[in] when The interval in which the work takes place + * + * \return Estimated total time of work for the object + * + * The `updateLoads` method must have been called before any call to + * this. + */ + TimeType getTotalWork( + ElementIDStruct object, PhaseOffset when, + double alpha, double beta, double gamma + ) { + return alpha * getLoad(object, when) + beta * getComm(object, when) + gamma; + } + /** * \brief Compute how many phases of past load statistics need to be * kept availble for the model to use diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index 4438ca17c3..ec2050f2ab 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -107,6 +107,28 @@ TimeType RawData::getRawLoad(ElementIDStruct object, PhaseOffset offset) { return proc_load_->at(phase).at(object).get(offset); } +TimeType RawData::getComm(ElementIDStruct object, PhaseOffset when) { + auto phase = getNumCompletedPhases() + when.phases; + auto& comm = proc_comm_->at(phase); + + TimeType incoming = 0., outgoing = 0.; + for (auto&& c : comm) { + if (c.first.commCategory() == elm::CommCategory::SendRecv + and c.first.offNode()) { + if (c.first.toObj() == object) { + incoming += /*per_msg_weight_ * */ c.second.messages; + incoming += /*per_byte_weight_ **/ c.second.bytes; + } else if (c.first.fromObj() == object) { + outgoing += /*per_msg_weight_ * */ c.second.messages; + outgoing += /*per_byte_weight_ **/ c.second.bytes; + } + } + } + + // TODO: consider subphases (?) + return std::max(incoming, outgoing); +} + unsigned int RawData::getNumPastPhasesNeeded(unsigned int look_back) { return look_back; diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index a751b6d06c..818d323978 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -62,6 +62,7 @@ struct RawData : public LoadModel { TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override { return true; } TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getComm(ElementIDStruct object, PhaseOffset when) override; void setLoads(std::unordered_map const* proc_load, std::unordered_map const* proc_comm) override; From 35b554444c822e7a234ad369b127cf2ac61ea239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 11 Mar 2022 15:16:54 +0100 Subject: [PATCH 005/106] #1672: lb: add TemperedWMin load balancer --- src/CMakeLists.txt | 1 + .../balance/lb_invoke/lb_manager.cc | 5 + src/vt/vrt/collection/balance/lb_type.h | 1 + .../vrt/collection/balance/model/load_model.h | 19 ---- .../balance/temperedlb/temperedlb.cc | 42 ++------- .../balance/temperedlb/temperedlb.h | 4 +- .../balance/temperedwmin/temperedwmin.cc | 94 +++++++++++++++++++ .../balance/temperedwmin/temperedwmin.h | 73 ++++++++++++++ 8 files changed, 182 insertions(+), 57 deletions(-) create mode 100644 src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc create mode 100644 src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8f50093f6d..656bb873c0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -86,6 +86,7 @@ set( vrt/collection/balance/greedylb vrt/collection/balance/rotatelb vrt/collection/balance/temperedlb + vrt/collection/balance/temperedwmin vrt/collection/balance/offlinelb vrt/collection/balance/zoltanlb vrt/collection/balance/randomlb diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index fbddd73d31..56e299b067 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -54,6 +54,7 @@ #include "vt/vrt/collection/balance/greedylb/greedylb.h" #include "vt/vrt/collection/balance/rotatelb/rotatelb.h" #include "vt/vrt/collection/balance/temperedlb/temperedlb.h" +#include "vt/vrt/collection/balance/temperedwmin/temperedwmin.h" #include "vt/vrt/collection/balance/offlinelb/offlinelb.h" #include "vt/vrt/collection/balance/lb_data_restart_reader.h" #include "vt/vrt/collection/balance/zoltanlb/zoltanlb.h" @@ -320,6 +321,7 @@ void LBManager::startLB( case LBType::ZoltanLB: lb_instances_["chosen"] = makeLB(); break; # endif case LBType::TestSerializationLB: lb_instances_["chosen"] = makeLB(); break; + // case LBType::TemperedWMin: lb_instances_["chosen"] = makeLB(); break; case LBType::NoLB: vtAssert(false, "LBType::NoLB is not a valid LB for collectiveImpl"); break; @@ -359,6 +361,9 @@ void LBManager::printLBArgsHelp(LBType lb) { case LBType::TemperedLB: help = lb::TemperedLB::getInputKeysWithHelp(); break; + case LBType::TemperedWMin: + help = lb::TemperedWMin::getInputKeysWithHelp(); + break; case LBType::RandomLB: help = lb::RandomLB::getInputKeysWithHelp(); break; diff --git a/src/vt/vrt/collection/balance/lb_type.h b/src/vt/vrt/collection/balance/lb_type.h index 8a467538b0..50d87aed43 100644 --- a/src/vt/vrt/collection/balance/lb_type.h +++ b/src/vt/vrt/collection/balance/lb_type.h @@ -64,6 +64,7 @@ enum struct LBType : int8_t { # endif , RandomLB = 7 , TestSerializationLB = 8 + , TemperedWMin = 9 }; }}}} /* end namespace vt::vrt::collection::balance */ diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index e911d58082..c6b78c2925 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -263,25 +263,6 @@ struct LoadModel return {}; } - /** - * \brief Provide an estimate of the total work for a given object during - * a specified interval - * - * \param[in] object The object whose total work is desired - * \param[in] when The interval in which the work takes place - * - * \return Estimated total time of work for the object - * - * The `updateLoads` method must have been called before any call to - * this. - */ - TimeType getTotalWork( - ElementIDStruct object, PhaseOffset when, - double alpha, double beta, double gamma - ) { - return alpha * getLoad(object, when) + beta * getComm(object, when) + gamma; - } - /** * \brief Compute how many phases of past load statistics need to be * kept availble for the model to use diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index f2525e2404..1b489fe7fa 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -250,33 +250,6 @@ Default: false When an object load exceeds the processor-average load (i.e., we have a "long pole"), adjust the target load to be the maximum object load ("longest pole") instead of the processor-average load. -)" - }, - { - "alpha", - R"( -Values: -Default: 1.0 -Description: - Load part coefficient in affine combination of load and communication. -)" - }, - { - "beta", - R"( -Values: -Default: 0.0 -Description: - Communication part coefficient in affine combination of load and communication. -)" - }, - { - "gamma", - R"( -Values: -Default: 0.0 -Description: - ... )" }, }; @@ -380,10 +353,6 @@ void TemperedLB::inputParams(balance::SpecEntry* spec) { num_iters_ = spec->getOrDefault("iters", num_iters_); num_trials_ = spec->getOrDefault("trials", num_trials_); - alpha_ = spec->getOrDefault("alpha", alpha_); - beta_ = spec->getOrDefault("beta", beta_); - gamma_ = spec->getOrDefault("gamma", gamma_); - deterministic_ = spec->getOrDefault("deterministic", deterministic_); rollback_ = spec->getOrDefault("rollback", rollback_); target_pole_ = spec->getOrDefault("targetpole", target_pole_); @@ -530,10 +499,7 @@ void TemperedLB::doLBStages(TimeType start_imb) { cur_objs_.clear(); for (auto obj : *load_model_) { if (obj.isMigratable()) { - // TODO: `beta_ * communication` component is still missing here - cur_objs_[obj] = alpha_ * load_model_->getLoad( - obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} - ) + gamma_; + cur_objs_[obj] = getTotalWork(obj); } } this_new_load_ = this_load; @@ -1395,4 +1361,10 @@ void TemperedLB::migrate() { vtAssertExpr(false); } +TimeType TemperedLB::getTotalWork(const elm::ElementIDStruct& obj) { + return load_model_->getLoad( + obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} + ); +} + }}}} /* end namespace vt::vrt::collection::lb */ diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 2032b42b0b..f41d9bc92a 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -111,6 +111,7 @@ struct TemperedLB : BaseLB { ElementLoadType::iterator selectObject( LoadType size, ElementLoadType& load, std::set const& available ); + virtual TimeType getTotalWork(const elm::ElementIDStruct& obj); void lazyMigrateObjsTo(EpochType epoch, NodeType node, ObjsType const& objs); void inLazyMigrations(balance::LazyMigrationMsg* msg); @@ -126,9 +127,6 @@ struct TemperedLB : BaseLB { uint8_t k_cur_ = 0; uint16_t iter_ = 0; uint16_t trial_ = 0; - double alpha_ = 1.0; - double beta_ = 0.0; - double gamma_ = 0.0; uint16_t num_iters_ = 4; /** * \brief Number of trials diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc new file mode 100644 index 0000000000..f411f61683 --- /dev/null +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc @@ -0,0 +1,94 @@ +/* +//@HEADER +// ***************************************************************************** +// +// temperedwmin.cc +// DARMA/vt => Virtual Transport +// +// Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include "vt/vrt/collection/balance/temperedwmin/temperedwmin.h" + +#include "vt/vrt/collection/balance/lb_common.h" +#include "vt/vrt/collection/balance/model/load_model.h" + +namespace vt { namespace vrt { namespace collection { namespace lb { + +/*static*/ std::unordered_map +TemperedWMin::getInputKeysWithHelp() { + auto map = TemperedLB::getInputKeysWithHelp(); + map["alpha"] = + R"( +Values: +Default: 1.0 +Description: + Load part coefficient in affine combination of load and communication. +)"; + map["beta"] = + R"( +Values: +Default: 0.0 +Description: + Communication part coefficient in affine combination of load and communication. +)"; + map["gamma"] = + R"( +Values: +Default: 0.0 +Description: + Unspecified constant cost. +)"; + return map; +} + +void TemperedWMin::inputParams(balance::SpecEntry* spec) { + TemperedLB::inputParams(spec); + + alpha_ = spec->getOrDefault("alpha", alpha_); + beta_ = spec->getOrDefault("beta", beta_); + gamma_ = spec->getOrDefault("gamma", gamma_); +} + +TimeType TemperedWMin::getTotalWork(const elm::ElementIDStruct& obj) { + balance::PhaseOffset when = + {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}; + + return alpha_ * load_model_->getLoad(obj, when) + + beta_ * load_model_->getComm(obj, when) + gamma_; +} + +}}}} // namespace vt::vrt::collection::lb diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h new file mode 100644 index 0000000000..ec661c1b1b --- /dev/null +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h @@ -0,0 +1,73 @@ +/* +//@HEADER +// ***************************************************************************** +// +// temperedwmin.h +// DARMA/vt => Virtual Transport +// +// Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VT_VRT_COLLECTION_BALANCE_TEMPEREDWMIN_TEMPEREDWMIN_H +#define INCLUDED_VT_VRT_COLLECTION_BALANCE_TEMPEREDWMIN_TEMPEREDWMIN_H + +#include "vt/vrt/collection/balance/temperedlb/temperedlb.h" + +namespace vt { namespace vrt { namespace collection { namespace lb { + +struct TemperedWMin : TemperedLB { + TemperedWMin() = default; + TemperedWMin(TemperedWMin const&) = delete; + + virtual ~TemperedWMin() { } + +public: + static std::unordered_map getInputKeysWithHelp(); + + void inputParams(balance::SpecEntry* spec) override; + +protected: + TimeType getTotalWork(const elm::ElementIDStruct& obj) override; + +private: + double alpha_ = 1.0; + double beta_ = 0.0; + double gamma_ = 0.0; +}; + +}}}} /* end namespace vt::vrt::collection::lb */ + +#endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_TEMPEREDWMIN_TEMPEREDWMIN_H*/ From 16a58cbd856ad38933b412981e250a14d4ed7f1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 22 Mar 2022 13:49:37 +0100 Subject: [PATCH 006/106] #1672: improve post-commit hooks --- scripts/post-commit | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/post-commit b/scripts/post-commit index ea8e3dd43e..8c1a0aa4a3 100755 --- a/scripts/post-commit +++ b/scripts/post-commit @@ -5,6 +5,12 @@ # every commit. # Prints the resulting changes if there are any. +# don't run the action during rebase +if ! [[ $(git branch --show-current) ]] +then + exit +fi + output=$(git clang-format HEAD~1) if [ "$output" != "" ] From e63c5407768afb6f3527e72897fd55aee570f04f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 22 Mar 2022 17:11:46 +0100 Subject: [PATCH 007/106] #1672: Revert 19b36419a "LB: remove comm aware flags" --- src/vt/vrt/collection/balance/baselb/baselb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index ac111ce9b3..c5ba7087ce 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -81,8 +81,9 @@ struct BaseLB { >; using ObjDestinationListType = std::vector>; - explicit BaseLB() - : pending_reassignment_(std::make_shared()) + explicit BaseLB(bool in_comm_aware = false) + : comm_aware_(in_comm_aware), + pending_reassignment_(std::make_shared()) { } BaseLB(BaseLB const &) = delete; @@ -153,6 +154,7 @@ struct BaseLB { std::unique_ptr spec_entry_ = nullptr; // Observer only - LBManager owns the instance balance::LoadModel* load_model_ = nullptr; + bool comm_aware_ = false; protected: /** From 1a2ee2e75b3f7b69cab3c46ff516c4159f482bf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 5 Apr 2022 18:07:30 +0200 Subject: [PATCH 008/106] #1672: rename `getLoad` to `getLoadMetric` --- src/vt/elm/elm_lb_data.cc | 6 +++--- src/vt/elm/elm_lb_data.h | 4 ++-- src/vt/vrt/collection/balance/baselb/load_sampler.cc | 2 +- src/vt/vrt/collection/balance/greedylb/greedylb.cc | 6 +++--- src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h | 2 +- src/vt/vrt/collection/balance/greedylb/greedylb_types.h | 8 ++++---- src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc | 4 ++-- .../vrt/collection/balance/hierarchicallb/hierlb_msgs.h | 2 +- src/vt/vrt/collection/balance/lb_common.cc | 4 ++-- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 2 +- src/vt/vrt/collection/balance/model/comm_overhead.cc | 6 +++--- src/vt/vrt/collection/balance/model/comm_overhead.h | 2 +- src/vt/vrt/collection/balance/model/composed_model.cc | 4 ++-- src/vt/vrt/collection/balance/model/composed_model.h | 2 +- src/vt/vrt/collection/balance/model/linear_model.cc | 6 +++--- src/vt/vrt/collection/balance/model/linear_model.h | 2 +- src/vt/vrt/collection/balance/model/load_model.h | 4 ++-- src/vt/vrt/collection/balance/model/multiple_phases.cc | 6 +++--- src/vt/vrt/collection/balance/model/multiple_phases.h | 2 +- src/vt/vrt/collection/balance/model/naive_persistence.cc | 4 ++-- src/vt/vrt/collection/balance/model/naive_persistence.h | 2 +- src/vt/vrt/collection/balance/model/norm.cc | 8 ++++---- src/vt/vrt/collection/balance/model/norm.h | 2 +- src/vt/vrt/collection/balance/model/per_collection.cc | 6 +++--- src/vt/vrt/collection/balance/model/per_collection.h | 2 +- .../collection/balance/model/persistence_median_last_n.cc | 6 +++--- .../collection/balance/model/persistence_median_last_n.h | 2 +- .../vrt/collection/balance/model/proposed_reassignment.cc | 4 ++-- .../vrt/collection/balance/model/proposed_reassignment.h | 2 +- src/vt/vrt/collection/balance/model/raw_data.cc | 2 +- src/vt/vrt/collection/balance/model/raw_data.h | 2 +- src/vt/vrt/collection/balance/model/select_subphases.cc | 6 +++--- src/vt/vrt/collection/balance/model/select_subphases.h | 2 +- src/vt/vrt/collection/balance/node_lb_data.cc | 2 +- src/vt/vrt/collection/balance/rotatelb/rotatelb.cc | 2 +- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- .../vrt/collection/balance/temperedwmin/temperedwmin.cc | 2 +- src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc | 2 +- tests/unit/collection/test_model_comm_overhead.nompi.cc | 4 ++-- tests/unit/collection/test_model_linear_model.nompi.cc | 4 ++-- tests/unit/collection/test_model_multiple_phases.nompi.cc | 4 ++-- .../unit/collection/test_model_naive_persistence.nompi.cc | 4 ++-- tests/unit/collection/test_model_norm.nompi.cc | 8 ++++---- .../unit/collection/test_model_per_collection.extended.cc | 4 ++-- .../test_model_persistence_median_last_n.nompi.cc | 4 ++-- tests/unit/collection/test_model_raw_data.nompi.cc | 4 ++-- .../unit/collection/test_model_select_subphases.nompi.cc | 6 +++--- 47 files changed, 88 insertions(+), 88 deletions(-) diff --git a/src/vt/elm/elm_lb_data.cc b/src/vt/elm/elm_lb_data.cc index 3496f48b19..be3468afbf 100644 --- a/src/vt/elm/elm_lb_data.cc +++ b/src/vt/elm/elm_lb_data.cc @@ -163,7 +163,7 @@ PhaseType ElementLBData::getPhase() const { return cur_phase_; } -TimeType ElementLBData::getLoad(PhaseType const& phase) const { +TimeType ElementLBData::getLoadMetric(PhaseType const& phase) const { auto iter = phase_timings_.find(phase); if (iter != phase_timings_.end()) { TimeTypeWrapper const total_load = phase_timings_.at(phase); @@ -180,9 +180,9 @@ TimeType ElementLBData::getLoad(PhaseType const& phase) const { } } -TimeType ElementLBData::getLoad(PhaseType phase, SubphaseType subphase) const { +TimeType ElementLBData::getLoadMetric(PhaseType phase, SubphaseType subphase) const { if (subphase == no_subphase) - return getLoad(phase); + return getLoadMetric(phase); auto const& subphase_loads = subphase_timings_.at(phase); diff --git a/src/vt/elm/elm_lb_data.h b/src/vt/elm/elm_lb_data.h index 554856f8d1..f5554d4eb3 100644 --- a/src/vt/elm/elm_lb_data.h +++ b/src/vt/elm/elm_lb_data.h @@ -84,8 +84,8 @@ struct ElementLBData { void updatePhase(PhaseType const& inc = 1); void resetPhase(); PhaseType getPhase() const; - TimeType getLoad(PhaseType const& phase) const; - TimeType getLoad(PhaseType phase, SubphaseType subphase) const; + TimeType getLoadMetric(PhaseType const& phase) const; + TimeType getLoadMetric(PhaseType phase, SubphaseType subphase) const; CommMapType const& getComm(PhaseType const& phase); std::vector const& getSubphaseComm(PhaseType phase); diff --git a/src/vt/vrt/collection/balance/baselb/load_sampler.cc b/src/vt/vrt/collection/balance/baselb/load_sampler.cc index 05dd76c80d..97a642bc0d 100644 --- a/src/vt/vrt/collection/balance/baselb/load_sampler.cc +++ b/src/vt/vrt/collection/balance/baselb/load_sampler.cc @@ -49,7 +49,7 @@ namespace vt { namespace vrt { namespace collection { namespace lb { void LoadSamplerBaseLB::buildHistogram() { for (auto obj : *load_model_) { - TimeTypeWrapper load = load_model_->getLoad( + TimeTypeWrapper load = load_model_->getLoadMetric( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); auto const& load_milli = loadMilli(load.seconds()); diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb.cc b/src/vt/vrt/collection/balance/greedylb/greedylb.cc index 810f7a134d..8e675fea0c 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb.cc +++ b/src/vt/vrt/collection/balance/greedylb/greedylb.cc @@ -273,10 +273,10 @@ void GreedyLB::runBalancer( "recs_={}, max_rec: obj={}, time={}\n", min_node.node_, TimeTypeWrapper(min_node.load_ / 1000), min_node.recs_.size(), max_rec.getObj(), - TimeTypeWrapper(max_rec.getLoad() / 1000) + TimeTypeWrapper(max_rec.getLoadMetric() / 1000) ); min_node.recs_.push_back(max_rec.getObj()); - min_node.load_ += max_rec.getLoad(); + min_node.load_ += max_rec.getLoadMetric(); nodes.push_back(min_node); std::push_heap(nodes.begin(), nodes.end(), CompProcType()); } @@ -413,7 +413,7 @@ void GreedyLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto const& obj_time_milli = loadMilli(load_model_->getLoad(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + auto const& obj_time_milli = loadMilli(load_model_->getLoadMetric(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); this_load -= obj_time_milli; diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h b/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h index 401545d3f3..3ab3286200 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h +++ b/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h @@ -120,7 +120,7 @@ struct GreedyCollectMsg : GreedyLBTypes, collective::ReduceTMsg { MessageParentType::serialize(s); } - ObjSampleType const& getLoad() const { + ObjSampleType const& getLoadMetric() const { return collective::ReduceTMsg::getConstVal().getSample(); } diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb_types.h b/src/vt/vrt/collection/balance/greedylb/greedylb_types.h index b6d7c8b4ce..1c3ef27b04 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb_types.h +++ b/src/vt/vrt/collection/balance/greedylb/greedylb_types.h @@ -71,7 +71,7 @@ struct GreedyRecord { : obj_(in_obj), load_(in_load) { } - LoadType getLoad() const { return load_; } + LoadType getLoadMetric() const { return load_; } ObjType getObj() const { return obj_; } private: @@ -85,7 +85,7 @@ struct GreedyProc { NodeType const& in_node, GreedyLBTypes::LoadType const& in_load ) : node_(in_node), load_(in_load) {} - GreedyLBTypes::LoadType getLoad() const { return load_; } + GreedyLBTypes::LoadType getLoadMetric() const { return load_; } NodeType node_ = uninitialized_destination; GreedyLBTypes::LoadType load_ = 0.0f; @@ -95,14 +95,14 @@ struct GreedyProc { template struct GreedyCompareLoadMin { bool operator()(T const& p1, T const& p2) const { - return p1.getLoad() > p2.getLoad(); + return p1.getLoadMetric() > p2.getLoadMetric(); } }; template struct GreedyCompareLoadMax { bool operator()(T const& p1, T const& p2) const { - return p1.getLoad() < p2.getLoad(); + return p1.getLoadMetric() < p2.getLoadMetric(); } }; diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc index f638bafd28..18d001a2f9 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc @@ -311,7 +311,7 @@ void HierarchicalLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto const& obj_time_milli = loadMilli(load_model_->getLoad(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + auto const& obj_time_milli = loadMilli(load_model_->getLoadMetric(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); this_load -= obj_time_milli; @@ -452,7 +452,7 @@ void HierarchicalLB::downTree( void HierarchicalLB::lbTreeUpHandler(LBTreeUpMsg* msg) { lbTreeUp( - msg->getChildLoad(), msg->getChild(), msg->getLoad(), msg->getChildSize() + msg->getChildLoad(), msg->getChild(), msg->getLoadMetric(), msg->getChildSize() ); } diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h b/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h index 8beb4c783b..c9bbbcdfeb 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h @@ -74,7 +74,7 @@ struct LBTreeUpMsg : HierLBTypes, ::vt::Message { LoadType getChildLoad() const { return child_load_; } NodeType getChild() const { return child_; } - ObjSampleType const& getLoad() const { return load_; } + ObjSampleType const& getLoadMetric() const { return load_; } ObjSampleType&& getLoadMove() { return std::move(load_); } NodeType getChildSize() const { return child_size_; } diff --git a/src/vt/vrt/collection/balance/lb_common.cc b/src/vt/vrt/collection/balance/lb_common.cc index dd50575542..bfec704d36 100644 --- a/src/vt/vrt/collection/balance/lb_common.cc +++ b/src/vt/vrt/collection/balance/lb_common.cc @@ -61,11 +61,11 @@ LoadSummary getObjectLoads( LoadModel* model, ElementIDStruct object, PhaseOffset when ) { LoadSummary ret; - ret.whole_phase_load = model->getLoad(object, {when.phases, PhaseOffset::WHOLE_PHASE}); + ret.whole_phase_load = model->getLoadMetric(object, {when.phases, PhaseOffset::WHOLE_PHASE}); unsigned int subphases = model->getNumSubphases(); for (unsigned int i = 0; i < subphases; ++i) - ret.subphase_loads.push_back(model->getLoad(object, {when.phases, i})); + ret.subphase_loads.push_back(model->getLoadMetric(object, {when.phases, i})); return ret; } diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 56e299b067..46b4bafe89 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -613,7 +613,7 @@ void LBManager::computeStatistics( total_load_from_model = 0.; std::vector obj_load_model; for (auto elm : *model) { - auto work = model->getLoad( + auto work = model->getLoadMetric( elm, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); obj_load_model.emplace_back( diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.cc b/src/vt/vrt/collection/balance/model/comm_overhead.cc index 27050fd086..9c8062e756 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.cc +++ b/src/vt/vrt/collection/balance/model/comm_overhead.cc @@ -60,8 +60,8 @@ void CommOverhead::setLoads(std::unordered_map const* pr ComposedModel::setLoads(proc_load, proc_comm); } -TimeType CommOverhead::getLoad(ElementIDStruct object, PhaseOffset offset) { - auto work = ComposedModel::getLoad(object, offset); +TimeType CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { + auto work = ComposedModel::getLoadMetric(object, offset); auto phase = getNumCompletedPhases() + offset.phases; auto& comm = proc_comm_->at(phase); @@ -79,7 +79,7 @@ TimeType CommOverhead::getLoad(ElementIDStruct object, PhaseOffset offset) { return work + overhead; } else { // @todo: we don't record comm costs for each subphase---split it proportionally - auto whole_phase_work = ComposedModel::getLoad(object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE}); + auto whole_phase_work = ComposedModel::getLoadMetric(object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE}); return work + overhead * ( static_cast(work)/whole_phase_work ); } } diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.h b/src/vt/vrt/collection/balance/model/comm_overhead.h index 3b23a3ae73..989e7cd329 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.h +++ b/src/vt/vrt/collection/balance/model/comm_overhead.h @@ -68,7 +68,7 @@ struct CommOverhead : public ComposedModel { void setLoads(std::unordered_map const* proc_load, std::unordered_map const* proc_comm) override; - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; private: std::unordered_map const* proc_comm_; /**< Underlying comm data */ diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc index 1272564164..66b6939ce0 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.cc +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -54,8 +54,8 @@ void ComposedModel::updateLoads(PhaseType last_completed_phase) { base_->updateLoads(last_completed_phase); } -TimeType ComposedModel::getLoad(ElementIDStruct object, PhaseOffset when) { - return base_->getLoad(object, when); +TimeType ComposedModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) { + return base_->getLoadMetric(object, when); } bool ComposedModel::hasRawLoad() const { diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index f170075493..a4763122d0 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -69,7 +69,7 @@ class ComposedModel : public LoadModel void updateLoads(PhaseType last_completed_phase) override; - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; diff --git a/src/vt/vrt/collection/balance/model/linear_model.cc b/src/vt/vrt/collection/balance/model/linear_model.cc index b967484cfa..505b477627 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.cc +++ b/src/vt/vrt/collection/balance/model/linear_model.cc @@ -48,12 +48,12 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -TimeType LinearModel::getLoad(ElementIDStruct object, PhaseOffset when) { +TimeType LinearModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) { using util::stats::LinearRegression; // Retrospective queries don't call for a prediction if (when.phases < 0) - return ComposedModel::getLoad(object, when); + return ComposedModel::getLoadMetric(object, when); std::vector x; std::vector y; @@ -65,7 +65,7 @@ TimeType LinearModel::getLoad(ElementIDStruct object, PhaseOffset when) { for (int i = -1 * static_cast(phases); i < 0; i++) { x.emplace_back(i); past_phase.phases = i; - y.emplace_back(ComposedModel::getLoad(object, past_phase)); + y.emplace_back(ComposedModel::getLoadMetric(object, past_phase)); } // should we re-create this every time? diff --git a/src/vt/vrt/collection/balance/model/linear_model.h b/src/vt/vrt/collection/balance/model/linear_model.h index 5b3a87eaef..3b948cce3c 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.h +++ b/src/vt/vrt/collection/balance/model/linear_model.h @@ -69,7 +69,7 @@ struct LinearModel : ComposedModel { past_len_(in_past_len) { } - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; private: diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index c6b78c2925..7f2f36f162 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -207,7 +207,7 @@ struct LoadModel * * This would typically be called by LBManager collectively inside * an epoch that can be used for global communication in advance of - * any calls to getLoad() + * any calls to getLoadMetric() * * The `setLoads` method must have been called before any call to * this. @@ -225,7 +225,7 @@ struct LoadModel * The `updateLoads` method must have been called before any call to * this. */ - virtual TimeType getLoad(ElementIDStruct object, PhaseOffset when) = 0; + virtual TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) = 0; /** * \brief Whether or not the model is based on the RawData model diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.cc b/src/vt/vrt/collection/balance/model/multiple_phases.cc index 1ba821b3d3..7bfa8cae26 100644 --- a/src/vt/vrt/collection/balance/model/multiple_phases.cc +++ b/src/vt/vrt/collection/balance/model/multiple_phases.cc @@ -45,16 +45,16 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -TimeType MultiplePhases::getLoad(ElementIDStruct object, PhaseOffset when) { +TimeType MultiplePhases::getLoadMetric(ElementIDStruct object, PhaseOffset when) { // Retrospective queries don't call for a prediction if (when.phases < 0) - return ComposedModel::getLoad(object, when); + return ComposedModel::getLoadMetric(object, when); TimeType sum = 0.0; for (int i = 0; i < future_phase_block_size_; ++i) { PhaseOffset p{future_phase_block_size_*when.phases + i, when.subphase}; - sum += ComposedModel::getLoad(object, p); + sum += ComposedModel::getLoadMetric(object, p); } return sum; diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.h b/src/vt/vrt/collection/balance/model/multiple_phases.h index ac01f997ad..3d899d17f5 100644 --- a/src/vt/vrt/collection/balance/model/multiple_phases.h +++ b/src/vt/vrt/collection/balance/model/multiple_phases.h @@ -79,7 +79,7 @@ struct MultiplePhases : ComposedModel { , future_phase_block_size_(in_future_phase_block_size) { } - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; private: int future_phase_block_size_ = 0; diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 2cd10186f0..723e96ea47 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -50,12 +50,12 @@ NaivePersistence::NaivePersistence(std::shared_ptr base) : ComposedModel(base) { } -TimeType NaivePersistence::getLoad(ElementIDStruct object, PhaseOffset offset) +TimeType NaivePersistence::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { if (offset.phases >= 0) offset.phases = -1; - return ComposedModel::getLoad(object, offset); + return ComposedModel::getLoadMetric(object, offset); } TimeType NaivePersistence::getRawLoad(ElementIDStruct object, PhaseOffset offset) diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 8a83b4e353..6b0629d7cd 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -60,7 +60,7 @@ struct NaivePersistence : public ComposedModel { * \param[in] base: The source of underlying load numbers to return; must not be null */ explicit NaivePersistence(std::shared_ptr base); - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset offset) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; }; // class NaivePersistence diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index dca85c6994..584357db9c 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -55,17 +55,17 @@ Norm::Norm(std::shared_ptr base, double power) vtAssert(power >= 0.0, "Reciprocal loads make no sense"); } -TimeType Norm::getLoad(ElementIDStruct object, PhaseOffset offset) +TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { if (offset.subphase != PhaseOffset::WHOLE_PHASE) - return ComposedModel::getLoad(object, offset); + return ComposedModel::getLoadMetric(object, offset); if (std::isfinite(power_)) { double sum = 0.0; for (int i = 0; i < getNumSubphases(); ++i) { offset.subphase = i; - auto t = ComposedModel::getLoad(object, offset); + auto t = ComposedModel::getLoadMetric(object, offset); sum += std::pow(t, power_); } @@ -76,7 +76,7 @@ TimeType Norm::getLoad(ElementIDStruct object, PhaseOffset offset) for (int i = 0; i < getNumSubphases(); ++i) { offset.subphase = i; - auto t = ComposedModel::getLoad(object, offset); + auto t = ComposedModel::getLoadMetric(object, offset); max = std::max(max, t); } diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h index 8fee467fc9..6dba219eae 100644 --- a/src/vt/vrt/collection/balance/model/norm.h +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -64,7 +64,7 @@ class Norm : public ComposedModel { */ Norm(std::shared_ptr base, double power); - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; private: const double power_; diff --git a/src/vt/vrt/collection/balance/model/per_collection.cc b/src/vt/vrt/collection/balance/model/per_collection.cc index 54cd37b9f1..770245e515 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.cc +++ b/src/vt/vrt/collection/balance/model/per_collection.cc @@ -68,14 +68,14 @@ void PerCollection::updateLoads(PhaseType last_completed_phase) { ComposedModel::updateLoads(last_completed_phase); } -TimeType PerCollection::getLoad(ElementIDStruct object, PhaseOffset when) { +TimeType PerCollection::getLoadMetric(ElementIDStruct object, PhaseOffset when) { // See if some specific model has been given for the object in question auto mi = models_.find(theNodeLBData()->getCollectionProxyForElement(object)); if (mi != models_.end()) - return mi->second->getLoad(object, when); + return mi->second->getLoadMetric(object, when); // Otherwise, default to the given base model - return ComposedModel::getLoad(object, when); + return ComposedModel::getLoadMetric(object, when); } bool PerCollection::hasRawLoad() const { diff --git a/src/vt/vrt/collection/balance/model/per_collection.h b/src/vt/vrt/collection/balance/model/per_collection.h index bf180f8a44..39a984e3e4 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.h +++ b/src/vt/vrt/collection/balance/model/per_collection.h @@ -78,7 +78,7 @@ struct PerCollection : public ComposedModel void updateLoads(PhaseType last_completed_phase) override; - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc index 14e8e2a974..da1318204b 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc @@ -54,17 +54,17 @@ PersistenceMedianLastN::PersistenceMedianLastN(std::shared_ptr base, vtAssert(n > 0, "Cannot take a median over no phases"); } -TimeType PersistenceMedianLastN::getLoad(ElementIDStruct object, PhaseOffset when) +TimeType PersistenceMedianLastN::getLoadMetric(ElementIDStruct object, PhaseOffset when) { // Retrospective queries don't call for a prospective calculation if (when.phases < 0) - return ComposedModel::getLoad(object, when); + return ComposedModel::getLoadMetric(object, when); unsigned int phases = std::min(n_, getNumCompletedPhases()); std::vector times(phases); for (unsigned int i = 1; i <= phases; ++i) { PhaseOffset p{-1*static_cast(i), when.subphase}; - TimeType t = ComposedModel::getLoad(object, p); + TimeType t = ComposedModel::getLoadMetric(object, p); times[i-1] = t; } diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h index aa13132bf3..b684f5cf0f 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h @@ -65,7 +65,7 @@ struct PersistenceMedianLastN : public ComposedModel */ PersistenceMedianLastN(std::shared_ptr base, unsigned int n); - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; private: diff --git a/src/vt/vrt/collection/balance/model/proposed_reassignment.cc b/src/vt/vrt/collection/balance/model/proposed_reassignment.cc index 2ffea77015..763ebb8ba8 100644 --- a/src/vt/vrt/collection/balance/model/proposed_reassignment.cc +++ b/src/vt/vrt/collection/balance/model/proposed_reassignment.cc @@ -93,7 +93,7 @@ int ProposedReassignment::getNumObjects() return base - departing + arriving; } -TimeType ProposedReassignment::getLoad(ElementIDStruct object, PhaseOffset when) +TimeType ProposedReassignment::getLoadMetric(ElementIDStruct object, PhaseOffset when) { auto a = reassignment_->arrive_.find(object); if (a != reassignment_->arrive_.end()) { @@ -104,7 +104,7 @@ TimeType ProposedReassignment::getLoad(ElementIDStruct object, PhaseOffset when) vtAssert(reassignment_->depart_.find(object) == reassignment_->depart_.end(), "Departing object should not appear as a load query subject"); - return ComposedModel::getLoad(object, when); + return ComposedModel::getLoadMetric(object, when); } TimeType ProposedReassignment::getRawLoad(ElementIDStruct object, PhaseOffset when) diff --git a/src/vt/vrt/collection/balance/model/proposed_reassignment.h b/src/vt/vrt/collection/balance/model/proposed_reassignment.h index 9c3c066fc7..83ad0527fc 100644 --- a/src/vt/vrt/collection/balance/model/proposed_reassignment.h +++ b/src/vt/vrt/collection/balance/model/proposed_reassignment.h @@ -57,7 +57,7 @@ struct ProposedReassignment : public ComposedModel { ObjectIterator begin() override; int getNumObjects() override; - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; private: diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index ec2050f2ab..36122a9fb6 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -94,7 +94,7 @@ int RawData::getNumSubphases() { return subphases; } -TimeType RawData::getLoad(ElementIDStruct object, PhaseOffset offset) +TimeType RawData::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { return getRawLoad(object, offset); } diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index 818d323978..5c879fc509 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -59,7 +59,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { struct RawData : public LoadModel { RawData() = default; void updateLoads(PhaseType last_completed_phase) override; - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override { return true; } TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; TimeType getComm(ElementIDStruct object, PhaseOffset when) override; diff --git a/src/vt/vrt/collection/balance/model/select_subphases.cc b/src/vt/vrt/collection/balance/model/select_subphases.cc index 3d4128f1cc..6fe423ffce 100644 --- a/src/vt/vrt/collection/balance/model/select_subphases.cc +++ b/src/vt/vrt/collection/balance/model/select_subphases.cc @@ -58,18 +58,18 @@ SelectSubphases::SelectSubphases(std::shared_ptr base, std::vector base, std::vector subphases); - TimeType getLoad(ElementIDStruct object, PhaseOffset when) override; + TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; int getNumSubphases() override; std::vector subphases_; diff --git a/src/vt/vrt/collection/balance/node_lb_data.cc b/src/vt/vrt/collection/balance/node_lb_data.cc index 010521cae1..3ee8d52001 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.cc +++ b/src/vt/vrt/collection/balance/node_lb_data.cc @@ -258,7 +258,7 @@ void NodeLBData::addNodeLBData( ); auto const phase = in->getPhase(); - auto const& total_load = in->getLoad(phase, focused_subphase); + auto const& total_load = in->getLoadMetric(phase, focused_subphase); auto &phase_data = lb_data_->node_data_[phase]; auto elm_iter = phase_data.find(id); diff --git a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc index a3fee46c60..a7e5288c54 100644 --- a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc +++ b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc @@ -76,7 +76,7 @@ void RotateLB::runLB(TimeType) { } for (auto obj : *load_model_) { - TimeTypeWrapper const load = load_model_->getLoad(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); + TimeTypeWrapper const load = load_model_->getLoadMetric(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); vt_debug_print( terse, lb, "\t RotateLB::migrating object to: obj={}, load={}, to_node={}\n", diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 1b489fe7fa..a176089a99 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1362,7 +1362,7 @@ void TemperedLB::migrate() { } TimeType TemperedLB::getTotalWork(const elm::ElementIDStruct& obj) { - return load_model_->getLoad( + return load_model_->getLoadMetric( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); } diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc index f411f61683..e1a1750aab 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc @@ -87,7 +87,7 @@ TimeType TemperedWMin::getTotalWork(const elm::ElementIDStruct& obj) { balance::PhaseOffset when = {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}; - return alpha_ * load_model_->getLoad(obj, when) + return alpha_ * load_model_->getLoadMetric(obj, when) + beta_ * load_model_->getComm(obj, when) + gamma_; } diff --git a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc index 575be300c7..086addcd59 100644 --- a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc +++ b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc @@ -483,7 +483,7 @@ std::unique_ptr ZoltanLB::makeGraph() { { int idx = 0; for (auto&& obj : load_objs) { - auto load = load_model_->getLoad( + auto load = load_model_->getLoadMetric( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); diff --git a/tests/unit/collection/test_model_comm_overhead.nompi.cc b/tests/unit/collection/test_model_comm_overhead.nompi.cc index 598ac4a5ee..0ac766e443 100644 --- a/tests/unit/collection/test_model_comm_overhead.nompi.cc +++ b/tests/unit/collection/test_model_comm_overhead.nompi.cc @@ -86,7 +86,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { const auto work = proc_load_->at(0).at(id).whole_phase_load; if (phase.subphase == PhaseOffset::WHOLE_PHASE) { @@ -167,7 +167,7 @@ TEST_F(TestModelCommOverhead, test_model_comm_overhead_1) { ++objects_seen; const auto subphase = num_phases == 0 ? PhaseOffset::WHOLE_PHASE : 1; - auto work_val = test_model->getLoad(obj, PhaseOffset{0, subphase}); + auto work_val = test_model->getLoadMetric(obj, PhaseOffset{0, subphase}); EXPECT_EQ(work_val, expected_work[num_phases]) << fmt::format("For element={} on phase={}\n", obj, num_phases); } diff --git a/tests/unit/collection/test_model_linear_model.nompi.cc b/tests/unit/collection/test_model_linear_model.nompi.cc index 8aa2223b9e..bc4fb4039b 100644 --- a/tests/unit/collection/test_model_linear_model.nompi.cc +++ b/tests/unit/collection/test_model_linear_model.nompi.cc @@ -79,7 +79,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { // Most recent phase will be at the end of vector return proc_load_->at(num_phases + phase.phases).at(id).whole_phase_load; } @@ -149,7 +149,7 @@ TEST_F(TestLinearModel, test_model_linear_model_1) { ++num_phases; for (auto&& obj : *test_model) { - auto work_val = test_model->getLoad(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ( work_val, obj.id == 1 ? expected_data[iter].first : expected_data[iter].second) diff --git a/tests/unit/collection/test_model_multiple_phases.nompi.cc b/tests/unit/collection/test_model_multiple_phases.nompi.cc index 9fd7c638ab..e60b84e007 100644 --- a/tests/unit/collection/test_model_multiple_phases.nompi.cc +++ b/tests/unit/collection/test_model_multiple_phases.nompi.cc @@ -77,7 +77,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { // Here we return predicted loads for future phases // For the sake of the test we use values from the past phases return proc_load_->at(phase.phases).at(id).whole_phase_load; @@ -119,7 +119,7 @@ TEST_F(TestModelMultiplePhases, test_model_multiple_phases_1) { test_model->updateLoads(3); for (auto&& obj : *test_model) { - auto work_val = test_model->getLoad(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, obj.id == 1 ? TimeType{100} : TimeType{85}); } } diff --git a/tests/unit/collection/test_model_naive_persistence.nompi.cc b/tests/unit/collection/test_model_naive_persistence.nompi.cc index 1b3a1f88b0..07cab43c02 100644 --- a/tests/unit/collection/test_model_naive_persistence.nompi.cc +++ b/tests/unit/collection/test_model_naive_persistence.nompi.cc @@ -81,7 +81,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { EXPECT_LE(phase.phases, -1); return proc_load_->at(getIndexFromPhase(phase.phases)).at(id).whole_phase_load; } @@ -124,7 +124,7 @@ TEST_F(TestModelNaivePersistence, test_model_naive_persistence_1) { for (auto it = test_model->begin(); it != test_model->end(); ++it) { auto &&obj = *it; for (auto phase : {0, -1, -2, -3, -4}) { - auto work_val = test_model->getLoad(obj, PhaseOffset{phase, 1}); + auto work_val = test_model->getLoadMetric(obj, PhaseOffset{phase, 1}); EXPECT_EQ(work_val, proc_loads.at(getIndexFromPhase(phase)).at(obj).whole_phase_load); } } diff --git a/tests/unit/collection/test_model_norm.nompi.cc b/tests/unit/collection/test_model_norm.nompi.cc index dfd22eb046..44016b36c6 100644 --- a/tests/unit/collection/test_model_norm.nompi.cc +++ b/tests/unit/collection/test_model_norm.nompi.cc @@ -84,7 +84,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { return proc_load_->at(0).at(id).subphase_loads.at(phase.subphase); } @@ -125,7 +125,7 @@ TEST_F(TestModelNorm, test_model_norm_1) { // offset.subphase != PhaseOffset::WHOLE_PHASE // expect work load value for given subphase - auto work_val = test_model->getLoad(obj, PhaseOffset{0, iter}); + auto work_val = test_model->getLoadMetric(obj, PhaseOffset{0, iter}); EXPECT_EQ(work_val, proc_load[0][obj].subphase_loads[iter]); } @@ -155,7 +155,7 @@ TEST_F(TestModelNorm, test_model_norm_2) { ++objects_seen; auto work_val = - test_model->getLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getLoadMetric(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_NEAR(work_val, expected_norms[obj.id - 1], 0.001); } @@ -184,7 +184,7 @@ TEST_F(TestModelNorm, test_model_norm_3) { ++objects_seen; auto work_val = - test_model->getLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getLoadMetric(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, expected_norms[obj.id - 1]); } diff --git a/tests/unit/collection/test_model_per_collection.extended.cc b/tests/unit/collection/test_model_per_collection.extended.cc index 68dc72d279..0b90772c10 100644 --- a/tests/unit/collection/test_model_per_collection.extended.cc +++ b/tests/unit/collection/test_model_per_collection.extended.cc @@ -74,7 +74,7 @@ struct ConstantTestModel : ComposedModel { proxy_(in_proxy) { } - TimeType getLoad(ElementIDStruct, PhaseOffset) override { + TimeType getLoadMetric(ElementIDStruct, PhaseOffset) override { return static_cast(proxy_); } @@ -153,7 +153,7 @@ TEST_F(TestModelPerCollection, test_model_per_collection_1) { // model to function model->updateLoads(0); for (auto&& obj : *model) { - auto work_val = model->getLoad(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); if (id_proxy_map.find(obj) != id_proxy_map.end()) { EXPECT_DOUBLE_EQ(work_val, static_cast(id_proxy_map[obj])); } diff --git a/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc b/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc index d9cfdccf0a..95b76ed0b3 100644 --- a/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc +++ b/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc @@ -79,7 +79,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { // Most recent phase will be at the end of vector return proc_load_->at(num_phases + phase.phases).at(id).whole_phase_load; } @@ -148,7 +148,7 @@ TEST_F(TestModelPersistenceMedianLastN, test_model_persistence_median_last_n_1) ++num_phases; for (auto&& obj : *test_model) { - auto work_val = test_model->getLoad(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ( work_val, obj.id == 1 ? expected_medians[iter].first : expected_medians[iter].second) diff --git a/tests/unit/collection/test_model_raw_data.nompi.cc b/tests/unit/collection/test_model_raw_data.nompi.cc index 916608d80e..85b1e51b18 100644 --- a/tests/unit/collection/test_model_raw_data.nompi.cc +++ b/tests/unit/collection/test_model_raw_data.nompi.cc @@ -101,10 +101,10 @@ TEST_F(TestRawData, test_model_raw_data_scalar) { EXPECT_TRUE(obj.id == 1 || obj.id == 2); objects_seen++; - auto work_val = test_model->getLoad(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoadMetric(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, load_holder[iter][obj].whole_phase_load); - auto sub_work_val = test_model->getLoad(obj, PhaseOffset{-1, 0}); + auto sub_work_val = test_model->getLoadMetric(obj, PhaseOffset{-1, 0}); EXPECT_EQ(sub_work_val, load_holder[iter][obj].subphase_loads[0]); auto raw_load_val = test_model->getRawLoad(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}); diff --git a/tests/unit/collection/test_model_select_subphases.nompi.cc b/tests/unit/collection/test_model_select_subphases.nompi.cc index fbd271f9f0..62aea3edf9 100644 --- a/tests/unit/collection/test_model_select_subphases.nompi.cc +++ b/tests/unit/collection/test_model_select_subphases.nompi.cc @@ -83,7 +83,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoad(ElementIDStruct id, PhaseOffset phase) override { + TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { return proc_load_->at(0).at(id).subphase_loads.at(phase.subphase); } @@ -146,7 +146,7 @@ TEST_F(TestModelSelectSubphases, test_model_select_subphases_1) { // offset.subphase != PhaseOffset::WHOLE_PHASE // expect work load value for given subphase - auto work_val = test_model->getLoad(obj, PhaseOffset{0, iter}); + auto work_val = test_model->getLoadMetric(obj, PhaseOffset{0, iter}); EXPECT_EQ(work_val, expected_values[obj][iter]); } @@ -187,7 +187,7 @@ TEST_F(TestModelSelectSubphases, test_model_select_subphases_2) { ++objects_seen; auto work_val = - test_model->getLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getLoadMetric(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, expected_values[obj]); } From 01916fda71766794c49b26081ae6396b5c6b733a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 6 Apr 2022 17:04:44 +0200 Subject: [PATCH 009/106] #1672: lb: prepare exchange in `runLB` --- src/vt/vrt/collection/balance/baselb/baselb.h | 2 ++ src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index c5ba7087ce..a883899951 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -143,6 +143,8 @@ struct BaseLB { TransferVecType& getTransfers() { return transfers_; } + bool isCommAware() const { return comm_aware_; } + protected: void getArgs(PhaseType phase); diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 46b4bafe89..f2bf947866 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -219,6 +219,12 @@ LBManager::runLB( model_->updateLoads(phase); }); + lb::BaseLB* strat = base_proxy.get(); + if (strat->isCommAware()) { + // do exchange + // runInEpochCollective(...) + } + runInEpochCollective("LBManager::runLB -> computeStats", [=] { auto stats_cb = vt::theCB()->makeBcast< LBManager, StatsMsgType, &LBManager::statsHandler @@ -239,7 +245,6 @@ LBManager::runLB( vt_debug_print(terse, lb, "LBManager: running strategy\n"); - lb::BaseLB* strat = base_proxy.get(); auto reassignment = strat->startLB( phase, base_proxy, model_.get(), stats, *comm, total_load_from_model ); From b95310838496bee901bb9302fe783710cc61c205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 5 Apr 2022 20:19:51 +0200 Subject: [PATCH 010/106] #1672: lb: add initialization for TemperedWMin - use single proxy underneath - use proxy bits to create proxy --- src/vt/elm/elm_lb_data.cc | 3 ++- .../vrt/collection/balance/hierarchicallb/hierlb.cc | 6 ++++-- src/vt/vrt/collection/balance/lb_common.cc | 7 +++++-- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 2 +- src/vt/vrt/collection/balance/lb_type.cc | 1 + src/vt/vrt/collection/balance/lb_type.h | 2 +- src/vt/vrt/collection/balance/model/comm_overhead.cc | 8 +++++--- src/vt/vrt/collection/balance/model/composed_model.cc | 3 ++- .../vrt/collection/balance/model/multiple_phases.cc | 3 ++- .../vrt/collection/balance/model/naive_persistence.cc | 4 ++-- src/vt/vrt/collection/balance/model/norm.cc | 4 +--- src/vt/vrt/collection/balance/model/per_collection.cc | 3 ++- .../balance/model/persistence_median_last_n.cc | 5 +++-- .../collection/balance/model/proposed_reassignment.cc | 4 ++-- .../collection/balance/model/proposed_reassignment.h | 2 +- .../vrt/collection/balance/model/select_subphases.cc | 3 ++- src/vt/vrt/collection/balance/temperedlb/temperedlb.h | 5 +++-- .../collection/balance/temperedwmin/temperedwmin.cc | 11 +++++++++-- .../collection/balance/temperedwmin/temperedwmin.h | 7 ++++--- .../unit/collection/test_model_linear_model.nompi.cc | 4 +++- .../collection/test_model_multiple_phases.nompi.cc | 4 +++- .../collection/test_model_per_collection.extended.cc | 4 +++- .../test_model_persistence_median_last_n.nompi.cc | 4 +++- tests/unit/collection/test_model_raw_data.nompi.cc | 4 +++- 24 files changed, 67 insertions(+), 36 deletions(-) diff --git a/src/vt/elm/elm_lb_data.cc b/src/vt/elm/elm_lb_data.cc index be3468afbf..950e8295d4 100644 --- a/src/vt/elm/elm_lb_data.cc +++ b/src/vt/elm/elm_lb_data.cc @@ -180,7 +180,8 @@ TimeType ElementLBData::getLoadMetric(PhaseType const& phase) const { } } -TimeType ElementLBData::getLoadMetric(PhaseType phase, SubphaseType subphase) const { +TimeType +ElementLBData::getLoadMetric(PhaseType phase, SubphaseType subphase) const { if (subphase == no_subphase) return getLoadMetric(phase); diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc index 18d001a2f9..33ca2e0b96 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc @@ -311,7 +311,8 @@ void HierarchicalLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto const& obj_time_milli = loadMilli(load_model_->getLoadMetric(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + auto const& obj_time_milli = loadMilli(load_model_->getLoadMetric(obj_id, + {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); this_load -= obj_time_milli; @@ -452,7 +453,8 @@ void HierarchicalLB::downTree( void HierarchicalLB::lbTreeUpHandler(LBTreeUpMsg* msg) { lbTreeUp( - msg->getChildLoad(), msg->getChild(), msg->getLoadMetric(), msg->getChildSize() + msg->getChildLoad(), msg->getChild(), msg->getLoadMetric(), + msg->getChildSize() ); } diff --git a/src/vt/vrt/collection/balance/lb_common.cc b/src/vt/vrt/collection/balance/lb_common.cc index bfec704d36..1027aad75f 100644 --- a/src/vt/vrt/collection/balance/lb_common.cc +++ b/src/vt/vrt/collection/balance/lb_common.cc @@ -61,11 +61,14 @@ LoadSummary getObjectLoads( LoadModel* model, ElementIDStruct object, PhaseOffset when ) { LoadSummary ret; - ret.whole_phase_load = model->getLoadMetric(object, {when.phases, PhaseOffset::WHOLE_PHASE}); + ret.whole_phase_load = + model->getLoadMetric(object, {when.phases, PhaseOffset::WHOLE_PHASE}); unsigned int subphases = model->getNumSubphases(); for (unsigned int i = 0; i < subphases; ++i) - ret.subphase_loads.push_back(model->getLoadMetric(object, {when.phases, i})); + ret.subphase_loads.push_back( + model->getLoadMetric(object, {when.phases, i}) + ); return ret; } diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index f2bf947866..55455d8b3f 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -326,7 +326,7 @@ void LBManager::startLB( case LBType::ZoltanLB: lb_instances_["chosen"] = makeLB(); break; # endif case LBType::TestSerializationLB: lb_instances_["chosen"] = makeLB(); break; - // case LBType::TemperedWMin: lb_instances_["chosen"] = makeLB(); break; + case LBType::TemperedWMin: lb_instances_["chosen"] = makeLB(); break; case LBType::NoLB: vtAssert(false, "LBType::NoLB is not a valid LB for collectiveImpl"); break; diff --git a/src/vt/vrt/collection/balance/lb_type.cc b/src/vt/vrt/collection/balance/lb_type.cc index b9cede89d4..3238b7e64f 100644 --- a/src/vt/vrt/collection/balance/lb_type.cc +++ b/src/vt/vrt/collection/balance/lb_type.cc @@ -61,6 +61,7 @@ static std::unordered_map lb_names_ = { {LBType::OfflineLB, std::string{"OfflineLB" }}, {LBType::RandomLB, std::string{"RandomLB" }}, {LBType::TestSerializationLB, std::string{"TestSerializationLB"}}, + {LBType::TemperedWMin, std::string{"TemperedWMin" }}, }; std::unordered_map& get_lb_names() { diff --git a/src/vt/vrt/collection/balance/lb_type.h b/src/vt/vrt/collection/balance/lb_type.h index 50d87aed43..f7d7d148fa 100644 --- a/src/vt/vrt/collection/balance/lb_type.h +++ b/src/vt/vrt/collection/balance/lb_type.h @@ -44,7 +44,7 @@ #if !defined INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_TYPE_H #define INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_TYPE_H -#include "vt/config.h" +#include "vt/configs/features/features_defines.h" #include #include diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.cc b/src/vt/vrt/collection/balance/model/comm_overhead.cc index 9c8062e756..489a5e0b31 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.cc +++ b/src/vt/vrt/collection/balance/model/comm_overhead.cc @@ -60,7 +60,8 @@ void CommOverhead::setLoads(std::unordered_map const* pr ComposedModel::setLoads(proc_load, proc_comm); } -TimeType CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { +TimeType +CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { auto work = ComposedModel::getLoadMetric(object, offset); auto phase = getNumCompletedPhases() + offset.phases; @@ -79,10 +80,11 @@ TimeType CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset) return work + overhead; } else { // @todo: we don't record comm costs for each subphase---split it proportionally - auto whole_phase_work = ComposedModel::getLoadMetric(object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE}); + auto whole_phase_work = ComposedModel::getLoadMetric( + object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE} + ); return work + overhead * ( static_cast(work)/whole_phase_work ); } } - }}}} diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc index 66b6939ce0..71906601f0 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.cc +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -54,7 +54,8 @@ void ComposedModel::updateLoads(PhaseType last_completed_phase) { base_->updateLoads(last_completed_phase); } -TimeType ComposedModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) { +TimeType +ComposedModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) { return base_->getLoadMetric(object, when); } diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.cc b/src/vt/vrt/collection/balance/model/multiple_phases.cc index 7bfa8cae26..4ea7e01cf6 100644 --- a/src/vt/vrt/collection/balance/model/multiple_phases.cc +++ b/src/vt/vrt/collection/balance/model/multiple_phases.cc @@ -45,7 +45,8 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -TimeType MultiplePhases::getLoadMetric(ElementIDStruct object, PhaseOffset when) { +TimeType +MultiplePhases::getLoadMetric(ElementIDStruct object, PhaseOffset when) { // Retrospective queries don't call for a prediction if (when.phases < 0) return ComposedModel::getLoadMetric(object, when); diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 723e96ea47..733893bd45 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -50,8 +50,8 @@ NaivePersistence::NaivePersistence(std::shared_ptr base) : ComposedModel(base) { } -TimeType NaivePersistence::getLoadMetric(ElementIDStruct object, PhaseOffset offset) -{ +TimeType +NaivePersistence::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { if (offset.phases >= 0) offset.phases = -1; diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index 584357db9c..9f9ba06bf3 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -55,8 +55,7 @@ Norm::Norm(std::shared_ptr base, double power) vtAssert(power >= 0.0, "Reciprocal loads make no sense"); } -TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset) -{ +TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { if (offset.subphase != PhaseOffset::WHOLE_PHASE) return ComposedModel::getLoadMetric(object, offset); @@ -84,5 +83,4 @@ TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset) } } - }}}} diff --git a/src/vt/vrt/collection/balance/model/per_collection.cc b/src/vt/vrt/collection/balance/model/per_collection.cc index 770245e515..1226baf6fd 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.cc +++ b/src/vt/vrt/collection/balance/model/per_collection.cc @@ -68,7 +68,8 @@ void PerCollection::updateLoads(PhaseType last_completed_phase) { ComposedModel::updateLoads(last_completed_phase); } -TimeType PerCollection::getLoadMetric(ElementIDStruct object, PhaseOffset when) { +TimeType +PerCollection::getLoadMetric(ElementIDStruct object, PhaseOffset when) { // See if some specific model has been given for the object in question auto mi = models_.find(theNodeLBData()->getCollectionProxyForElement(object)); if (mi != models_.end()) diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc index da1318204b..033465f46e 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc @@ -54,8 +54,9 @@ PersistenceMedianLastN::PersistenceMedianLastN(std::shared_ptr base, vtAssert(n > 0, "Cannot take a median over no phases"); } -TimeType PersistenceMedianLastN::getLoadMetric(ElementIDStruct object, PhaseOffset when) -{ +TimeType PersistenceMedianLastN::getLoadMetric( + ElementIDStruct object, PhaseOffset when +) { // Retrospective queries don't call for a prospective calculation if (when.phases < 0) return ComposedModel::getLoadMetric(object, when); diff --git a/src/vt/vrt/collection/balance/model/proposed_reassignment.cc b/src/vt/vrt/collection/balance/model/proposed_reassignment.cc index 763ebb8ba8..26ace02ed2 100644 --- a/src/vt/vrt/collection/balance/model/proposed_reassignment.cc +++ b/src/vt/vrt/collection/balance/model/proposed_reassignment.cc @@ -93,8 +93,8 @@ int ProposedReassignment::getNumObjects() return base - departing + arriving; } -TimeType ProposedReassignment::getLoadMetric(ElementIDStruct object, PhaseOffset when) -{ +TimeType +ProposedReassignment::getLoadMetric(ElementIDStruct object, PhaseOffset when) { auto a = reassignment_->arrive_.find(object); if (a != reassignment_->arrive_.end()) { return std::get<0>(a->second).get(when); diff --git a/src/vt/vrt/collection/balance/model/proposed_reassignment.h b/src/vt/vrt/collection/balance/model/proposed_reassignment.h index 83ad0527fc..a5aaad08f0 100644 --- a/src/vt/vrt/collection/balance/model/proposed_reassignment.h +++ b/src/vt/vrt/collection/balance/model/proposed_reassignment.h @@ -60,7 +60,7 @@ struct ProposedReassignment : public ComposedModel { TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; - private: +private: std::shared_ptr reassignment_; }; diff --git a/src/vt/vrt/collection/balance/model/select_subphases.cc b/src/vt/vrt/collection/balance/model/select_subphases.cc index 6fe423ffce..9c26662c02 100644 --- a/src/vt/vrt/collection/balance/model/select_subphases.cc +++ b/src/vt/vrt/collection/balance/model/select_subphases.cc @@ -58,7 +58,8 @@ SelectSubphases::SelectSubphases(std::shared_ptr base, std::vector propagated_k_; - std::mt19937 gen_propagate_; - std::mt19937 gen_sample_; StatisticMapType stats; LoadType this_load = 0.0f; }; diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc index e1a1750aab..0012b27b83 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc @@ -48,6 +48,13 @@ namespace vt { namespace vrt { namespace collection { namespace lb { +void TemperedWMin::init(objgroup::proxy::Proxy in_proxy) { + auto proxy_bits = in_proxy.getProxy(); + auto proxy = objgroup::proxy::Proxy(proxy_bits); + auto strat = proxy.get(); + strat->init(proxy); +} + /*static*/ std::unordered_map TemperedWMin::getInputKeysWithHelp() { auto map = TemperedLB::getInputKeysWithHelp(); @@ -87,8 +94,8 @@ TimeType TemperedWMin::getTotalWork(const elm::ElementIDStruct& obj) { balance::PhaseOffset when = {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}; - return alpha_ * load_model_->getLoadMetric(obj, when) - + beta_ * load_model_->getComm(obj, when) + gamma_; + return alpha_ * load_model_->getLoadMetric(obj, when) + + beta_ * load_model_->getComm(obj, when) + gamma_; } }}}} // namespace vt::vrt::collection::lb diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h index ec661c1b1b..e93ce565a9 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h @@ -55,6 +55,7 @@ struct TemperedWMin : TemperedLB { virtual ~TemperedWMin() { } public: + void init(objgroup::proxy::Proxy in_proxy); static std::unordered_map getInputKeysWithHelp(); void inputParams(balance::SpecEntry* spec) override; @@ -63,9 +64,9 @@ struct TemperedWMin : TemperedLB { TimeType getTotalWork(const elm::ElementIDStruct& obj) override; private: - double alpha_ = 1.0; - double beta_ = 0.0; - double gamma_ = 0.0; + double alpha_ = 1.0; + double beta_ = 0.0; + double gamma_ = 0.0; }; }}}} /* end namespace vt::vrt::collection::lb */ diff --git a/tests/unit/collection/test_model_linear_model.nompi.cc b/tests/unit/collection/test_model_linear_model.nompi.cc index bc4fb4039b..0a5b85817f 100644 --- a/tests/unit/collection/test_model_linear_model.nompi.cc +++ b/tests/unit/collection/test_model_linear_model.nompi.cc @@ -149,7 +149,9 @@ TEST_F(TestLinearModel, test_model_linear_model_1) { ++num_phases; for (auto&& obj : *test_model) { - auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoadMetric( + obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE} + ); EXPECT_EQ( work_val, obj.id == 1 ? expected_data[iter].first : expected_data[iter].second) diff --git a/tests/unit/collection/test_model_multiple_phases.nompi.cc b/tests/unit/collection/test_model_multiple_phases.nompi.cc index e60b84e007..bf227a8e06 100644 --- a/tests/unit/collection/test_model_multiple_phases.nompi.cc +++ b/tests/unit/collection/test_model_multiple_phases.nompi.cc @@ -119,7 +119,9 @@ TEST_F(TestModelMultiplePhases, test_model_multiple_phases_1) { test_model->updateLoads(3); for (auto&& obj : *test_model) { - auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoadMetric( + obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE} + ); EXPECT_EQ(work_val, obj.id == 1 ? TimeType{100} : TimeType{85}); } } diff --git a/tests/unit/collection/test_model_per_collection.extended.cc b/tests/unit/collection/test_model_per_collection.extended.cc index 0b90772c10..2d03ec6aba 100644 --- a/tests/unit/collection/test_model_per_collection.extended.cc +++ b/tests/unit/collection/test_model_per_collection.extended.cc @@ -153,7 +153,9 @@ TEST_F(TestModelPerCollection, test_model_per_collection_1) { // model to function model->updateLoads(0); for (auto&& obj : *model) { - auto work_val = model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = model->getLoadMetric( + obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE} + ); if (id_proxy_map.find(obj) != id_proxy_map.end()) { EXPECT_DOUBLE_EQ(work_val, static_cast(id_proxy_map[obj])); } diff --git a/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc b/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc index 95b76ed0b3..9395e3ac4e 100644 --- a/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc +++ b/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc @@ -148,7 +148,9 @@ TEST_F(TestModelPersistenceMedianLastN, test_model_persistence_median_last_n_1) ++num_phases; for (auto&& obj : *test_model) { - auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoadMetric( + obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE} + ); EXPECT_EQ( work_val, obj.id == 1 ? expected_medians[iter].first : expected_medians[iter].second) diff --git a/tests/unit/collection/test_model_raw_data.nompi.cc b/tests/unit/collection/test_model_raw_data.nompi.cc index 85b1e51b18..ed3032865b 100644 --- a/tests/unit/collection/test_model_raw_data.nompi.cc +++ b/tests/unit/collection/test_model_raw_data.nompi.cc @@ -101,7 +101,9 @@ TEST_F(TestRawData, test_model_raw_data_scalar) { EXPECT_TRUE(obj.id == 1 || obj.id == 2); objects_seen++; - auto work_val = test_model->getLoadMetric(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}); + auto work_val = test_model->getLoadMetric( + obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE} + ); EXPECT_EQ(work_val, load_holder[iter][obj].whole_phase_load); auto sub_work_val = test_model->getLoadMetric(obj, PhaseOffset{-1, 0}); From e0ec22c7c81e333c2284d3a30d1ac4d94e08d948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 12 Apr 2022 18:28:43 +0200 Subject: [PATCH 011/106] #1672: tests: verify TemperedWMin --- tests/unit/collection/test_lb.extended.cc | 16 +++++++++------ tests/unit/lb/test_temperedlb.nompi.cc | 25 +++++++++++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index ab43e1e041..934fa47399 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -94,7 +94,10 @@ void runTest(std::string lb_name) { if (vt::theContext()->getNode() == 0) { fmt::print("Testing lb {}\n", lb_name); } - if (lb_name.compare("TemperedLB") == 0) { + if ( + lb_name.compare("TemperedLB") == 0 || + lb_name.compare("TemperedWMin") == 0 + ) { std::string lb_args("ordering=Arbitrary rollback=false"); vt::theConfig()->vt_lb_args = lb_args; if (vt::theContext()->getNode() == 0) { @@ -174,12 +177,13 @@ TEST_F(TestLoadBalancerNoWork, test_load_balancer_no_work) { } auto balancers_other = ::testing::Values( - "RandomLB", - "RotateLB", - "HierarchicalLB", - "TemperedLB" + "RandomLB", + "RotateLB", + "HierarchicalLB", + "TemperedLB", + "TemperedWMin" # if vt_check_enabled(zoltan) - , "ZoltanLB" + , "ZoltanLB" # endif ); diff --git a/tests/unit/lb/test_temperedlb.nompi.cc b/tests/unit/lb/test_temperedlb.nompi.cc index bd0c15e811..f74f9697d9 100644 --- a/tests/unit/lb/test_temperedlb.nompi.cc +++ b/tests/unit/lb/test_temperedlb.nompi.cc @@ -44,6 +44,7 @@ #include #include #include +#include #include "test_harness.h" @@ -75,14 +76,15 @@ TimeType setupProblem( void orderAndVerify( ObjectOrdering order, - const std::unordered_map &cur_objs, + const std::unordered_map& cur_objs, TimeType my_load, TimeType target_load, - const std::vector &soln -) { + const std::vector& soln, bool use_tempered_wmin = false) { // have TemperedLB order the objects - auto ordered_objs = vt::vrt::collection::lb::TemperedLB::orderObjects( - order, cur_objs, my_load, target_load - ); + auto ordered_objs = use_tempered_wmin ? + vt::vrt::collection::lb::TemperedWMin::orderObjects( + order, cur_objs, my_load, target_load) : + vt::vrt::collection::lb::TemperedLB::orderObjects( + order, cur_objs, my_load, target_load); // verify correctness of the returned ordering int i = 0; @@ -199,4 +201,15 @@ TEST_F(TestTemperedLB, test_temperedlb_ordering_largestobjects) { orderUsingOverloadAndVerify(order, over_avg, soln); } +/////////////////////////////////////////////////////////////////////////// + +TEST_F(TestTemperedLB, test_temperedwmin) { + ObjectOrdering order = ObjectOrdering::ElmID; + TimeType over_avg = 4.5; + // result will be independent of over_avg + std::vector soln = {0, 1, 2, 3, 4, 5}; + + orderUsingOverloadAndVerify(order, over_avg, soln); +} + }}} // end namespace vt::tests::unit From 1619b66048d375580d15163b49f9a48047f98ae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 13 Apr 2022 15:22:08 +0200 Subject: [PATCH 012/106] #1672: lb: store communication on both sides --- .../vrt/collection/balance/baselb/baselb.cc | 10 +++ src/vt/vrt/collection/balance/baselb/baselb.h | 21 ++++++- .../balance/lb_invoke/lb_manager.cc | 61 ++++++++++++++++++- .../collection/balance/lb_invoke/lb_manager.h | 4 ++ 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 2c8a5004bb..7c09006800 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -260,6 +260,16 @@ void BaseLB::finalize(CountMsg* msg) { } } +void BaseLB::recvSharedEdges(CommMsg* msg) { + auto& comm = msg->comm_; + for (auto&& elm : comm) { + vt_debug_print( + verbose, lb, "recv shared edge: from={}, to={}\n", elm.first.fromObj(), + elm.first.toObj() + ); + } +} + }}}} /* end namespace vt::vrt::collection::lb */ #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_BASELB_BASELB_CC*/ diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index a883899951..da7d60424c 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -64,6 +64,8 @@ struct LoadModel; namespace lb { +struct CommMsg; + struct BaseLB { using ObjIDType = balance::ElementIDStruct; using ElementLoadType = std::unordered_map; @@ -144,6 +146,7 @@ struct BaseLB { TransferVecType& getTransfers() { return transfers_; } bool isCommAware() const { return comm_aware_; } + void recvSharedEdges(CommMsg* msg); protected: void getArgs(PhaseType phase); @@ -176,6 +179,22 @@ struct BaseLB { std::shared_ptr pending_reassignment_ = nullptr; }; -}}}} /* end namespace vt::vrt::collection::lb */ +struct CommMsg : vt::Message { + using MessageParentType = vt::Message; + vt_msg_serialize_required(); + + CommMsg() = default; + explicit CommMsg(lb::BaseLB::ElementCommType in_comm) : comm_(in_comm) { } + + lb::BaseLB::ElementCommType comm_; + + template + void serialize(SerializerT& s) { + MessageParentType::serialize(s); + s | comm_; + } +}; + +}}}} // namespace vt::vrt::collection::lb #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_BASELB_BASELB_H*/ diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 55455d8b3f..a59cbe8adb 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -45,6 +45,7 @@ #include "vt/configs/arguments/app_config.h" #include "vt/context/context.h" #include "vt/phase/phase_hook_enum.h" +#include "vt/vrt/collection/balance/baselb/baselb.h" #include "vt/vrt/collection/balance/lb_invoke/lb_manager.h" #include "vt/vrt/collection/balance/stats_msg.h" #include "vt/vrt/collection/balance/read_lb.h" @@ -220,9 +221,12 @@ LBManager::runLB( }); lb::BaseLB* strat = base_proxy.get(); + auto proxy = lb_instances_["chosen"]; if (strat->isCommAware()) { - // do exchange - // runInEpochCollective(...) + runInEpochCollective( + "LBManager::runLB -> makeGraphSymmetric", + [phase, proxy] { makeGraphSymmetric(phase, proxy); } + ); } runInEpochCollective("LBManager::runLB -> computeStats", [=] { @@ -724,4 +728,57 @@ void LBManager::closeStatisticsFile() { statistics_writer_ = nullptr; } +void makeGraphSymmetric( + PhaseType phase, objgroup::proxy::Proxy proxy +) { + auto const this_node = theContext()->getNode(); + + // TODO: extract to helper method + elm::CommMapType empty_comm; + elm::CommMapType const* comm_data = &empty_comm; + auto iter = theNodeStats()->getNodeComm()->find(phase); + // TODO: is it an error when we don't find the phase? + if (iter != theNodeStats()->getNodeComm()->end()) { + comm_data = &iter->second; + } + + // Go through the comm graph and extract out paired SendRecv edges that are + // not self-send and have a non-local edge + std::unordered_map shared_edges; + + for (auto&& elm : *comm_data) { + if ( + elm.first.commCategory() == elm::CommCategory::SendRecv and + not elm.first.selfEdge() + ) { + auto from = elm.first.fromObj(); + auto to = elm.first.toObj(); + + auto from_node = from.curr_node; + auto to_node = to.curr_node; + + vtAssert( + from_node == this_node or to_node == this_node, + "One node must involve this node" + ); + + vt_debug_print( + verbose, lb, "makeGraphSymmetric: from={}, to={}\n", from, to + ); + + if (from_node != this_node) { + shared_edges[from_node][elm.first] = elm.second; + } else if (to_node != this_node) { + shared_edges[to_node][elm.first] = elm.second; + } + } + } + + for (auto&& elm : shared_edges) { + proxy[elm.first].send( + elm.second + ); + } +} + }}}} /* end namespace vt::vrt::collection::balance */ diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index 05af41acb0..c9249480a2 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -292,6 +292,10 @@ struct LBManager : runtime::component::Component { std::unique_ptr statistics_writer_ = nullptr; }; +void makeGraphSymmetric( + PhaseType phase, objgroup::proxy::Proxy proxy +); + }}}} /* end namespace vt::vrt::collection::balance */ namespace vt { From c0ae121413c3905aeec1865f2d47ecd5fcf3cb7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 15 Apr 2022 15:03:57 +0200 Subject: [PATCH 013/106] #1672: make TemperedWMin comm aware --- src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h index e93ce565a9..6f22d3a3a1 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h @@ -49,7 +49,7 @@ namespace vt { namespace vrt { namespace collection { namespace lb { struct TemperedWMin : TemperedLB { - TemperedWMin() = default; + TemperedWMin() { comm_aware_ = true; } TemperedWMin(TemperedWMin const&) = delete; virtual ~TemperedWMin() { } From 0622aa6c7f8fa2a59aded084670f9d2ca0a7653f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 19 Apr 2022 14:48:26 +0200 Subject: [PATCH 014/106] #1672: lb: add received comm data to graph --- .../vrt/collection/balance/baselb/baselb.cc | 21 +++++++++++++------ src/vt/vrt/collection/balance/node_lb_data.h | 7 +++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 7c09006800..e0fa94035e 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -54,6 +54,7 @@ #include "vt/collective/collective_alg.h" #include "vt/vrt/collection/balance/lb_common.h" #include "vt/vrt/collection/balance/model/load_model.h" +#include "vt/phase/phase_manager.h" #include @@ -261,12 +262,20 @@ void BaseLB::finalize(CountMsg* msg) { } void BaseLB::recvSharedEdges(CommMsg* msg) { - auto& comm = msg->comm_; - for (auto&& elm : comm) { - vt_debug_print( - verbose, lb, "recv shared edge: from={}, to={}\n", elm.first.fromObj(), - elm.first.toObj() - ); + auto phase = thePhase()->getCurrentPhase(); + auto iter = theNodeStats()->getNodeComm()->find(phase); + + if (iter != theNodeStats()->getNodeComm()->end()) { + auto comm_map = &iter->second; + + auto& comm = msg->comm_; + for (auto&& elm : comm) { + comm_map->insert(elm); + vt_debug_print( + verbose, lb, "recv shared edge: from={}, to={}\n", elm.first.fromObj(), + elm.first.toObj() + ); + } } } diff --git a/src/vt/vrt/collection/balance/node_lb_data.h b/src/vt/vrt/collection/balance/node_lb_data.h index fdd83ebdbb..9e14cf6fde 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.h +++ b/src/vt/vrt/collection/balance/node_lb_data.h @@ -175,6 +175,13 @@ struct NodeLBData : runtime::component::Component { */ std::unordered_map const* getNodeComm() const; + /** + * \internal \brief Get stored object comm graph + * + * \return a pointer to the comm graph + */ + std::unordered_map* getNodeComm(); + /** * \internal \brief Get stored object comm subphase graph * From 543f320a266f325de04de98f5295c9c6e8463424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 21 Apr 2022 14:20:41 +0200 Subject: [PATCH 015/106] #1672: lb: rename `getLoadMetric` to `getModeledLoad` --- src/vt/elm/elm_lb_data.cc | 6 +++--- src/vt/elm/elm_lb_data.h | 4 ++-- src/vt/vrt/collection/balance/baselb/load_sampler.cc | 2 +- src/vt/vrt/collection/balance/greedylb/greedylb.cc | 8 +++++--- src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h | 2 +- src/vt/vrt/collection/balance/greedylb/greedylb_types.h | 8 ++++---- src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc | 4 ++-- .../vrt/collection/balance/hierarchicallb/hierlb_msgs.h | 2 +- src/vt/vrt/collection/balance/lb_common.cc | 4 ++-- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 2 +- src/vt/vrt/collection/balance/model/comm_overhead.cc | 6 +++--- src/vt/vrt/collection/balance/model/comm_overhead.h | 2 +- src/vt/vrt/collection/balance/model/composed_model.cc | 4 ++-- src/vt/vrt/collection/balance/model/composed_model.h | 2 +- src/vt/vrt/collection/balance/model/linear_model.cc | 6 +++--- src/vt/vrt/collection/balance/model/linear_model.h | 2 +- src/vt/vrt/collection/balance/model/load_model.h | 4 ++-- src/vt/vrt/collection/balance/model/multiple_phases.cc | 6 +++--- src/vt/vrt/collection/balance/model/multiple_phases.h | 2 +- src/vt/vrt/collection/balance/model/naive_persistence.cc | 4 ++-- src/vt/vrt/collection/balance/model/naive_persistence.h | 2 +- src/vt/vrt/collection/balance/model/norm.cc | 8 ++++---- src/vt/vrt/collection/balance/model/norm.h | 2 +- src/vt/vrt/collection/balance/model/per_collection.cc | 6 +++--- src/vt/vrt/collection/balance/model/per_collection.h | 2 +- .../collection/balance/model/persistence_median_last_n.cc | 6 +++--- .../collection/balance/model/persistence_median_last_n.h | 2 +- .../vrt/collection/balance/model/proposed_reassignment.cc | 4 ++-- .../vrt/collection/balance/model/proposed_reassignment.h | 2 +- src/vt/vrt/collection/balance/model/raw_data.cc | 2 +- src/vt/vrt/collection/balance/model/raw_data.h | 2 +- src/vt/vrt/collection/balance/model/select_subphases.cc | 6 +++--- src/vt/vrt/collection/balance/model/select_subphases.h | 2 +- src/vt/vrt/collection/balance/node_lb_data.cc | 2 +- src/vt/vrt/collection/balance/rotatelb/rotatelb.cc | 4 +++- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- .../vrt/collection/balance/temperedwmin/temperedwmin.cc | 2 +- src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc | 2 +- tests/unit/collection/test_model_comm_overhead.nompi.cc | 4 ++-- tests/unit/collection/test_model_linear_model.nompi.cc | 4 ++-- tests/unit/collection/test_model_multiple_phases.nompi.cc | 4 ++-- .../unit/collection/test_model_naive_persistence.nompi.cc | 4 ++-- tests/unit/collection/test_model_norm.nompi.cc | 8 ++++---- .../unit/collection/test_model_per_collection.extended.cc | 4 ++-- .../test_model_persistence_median_last_n.nompi.cc | 4 ++-- tests/unit/collection/test_model_raw_data.nompi.cc | 4 ++-- .../unit/collection/test_model_select_subphases.nompi.cc | 6 +++--- 47 files changed, 92 insertions(+), 88 deletions(-) diff --git a/src/vt/elm/elm_lb_data.cc b/src/vt/elm/elm_lb_data.cc index 950e8295d4..266dc54e61 100644 --- a/src/vt/elm/elm_lb_data.cc +++ b/src/vt/elm/elm_lb_data.cc @@ -163,7 +163,7 @@ PhaseType ElementLBData::getPhase() const { return cur_phase_; } -TimeType ElementLBData::getLoadMetric(PhaseType const& phase) const { +TimeType ElementLBData::getModeledLoad(PhaseType const& phase) const { auto iter = phase_timings_.find(phase); if (iter != phase_timings_.end()) { TimeTypeWrapper const total_load = phase_timings_.at(phase); @@ -181,9 +181,9 @@ TimeType ElementLBData::getLoadMetric(PhaseType const& phase) const { } TimeType -ElementLBData::getLoadMetric(PhaseType phase, SubphaseType subphase) const { +ElementLBData::getModeledLoad(PhaseType phase, SubphaseType subphase) const { if (subphase == no_subphase) - return getLoadMetric(phase); + return getModeledLoad(phase); auto const& subphase_loads = subphase_timings_.at(phase); diff --git a/src/vt/elm/elm_lb_data.h b/src/vt/elm/elm_lb_data.h index f5554d4eb3..6d1cd8d78e 100644 --- a/src/vt/elm/elm_lb_data.h +++ b/src/vt/elm/elm_lb_data.h @@ -84,8 +84,8 @@ struct ElementLBData { void updatePhase(PhaseType const& inc = 1); void resetPhase(); PhaseType getPhase() const; - TimeType getLoadMetric(PhaseType const& phase) const; - TimeType getLoadMetric(PhaseType phase, SubphaseType subphase) const; + TimeType getModeledLoad(PhaseType const& phase) const; + TimeType getModeledLoad(PhaseType phase, SubphaseType subphase) const; CommMapType const& getComm(PhaseType const& phase); std::vector const& getSubphaseComm(PhaseType phase); diff --git a/src/vt/vrt/collection/balance/baselb/load_sampler.cc b/src/vt/vrt/collection/balance/baselb/load_sampler.cc index 97a642bc0d..1ac32631d4 100644 --- a/src/vt/vrt/collection/balance/baselb/load_sampler.cc +++ b/src/vt/vrt/collection/balance/baselb/load_sampler.cc @@ -49,7 +49,7 @@ namespace vt { namespace vrt { namespace collection { namespace lb { void LoadSamplerBaseLB::buildHistogram() { for (auto obj : *load_model_) { - TimeTypeWrapper load = load_model_->getLoadMetric( + TimeTypeWrapper load = load_model_->getModeledLoad( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); auto const& load_milli = loadMilli(load.seconds()); diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb.cc b/src/vt/vrt/collection/balance/greedylb/greedylb.cc index 8e675fea0c..423af76d85 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb.cc +++ b/src/vt/vrt/collection/balance/greedylb/greedylb.cc @@ -273,10 +273,10 @@ void GreedyLB::runBalancer( "recs_={}, max_rec: obj={}, time={}\n", min_node.node_, TimeTypeWrapper(min_node.load_ / 1000), min_node.recs_.size(), max_rec.getObj(), - TimeTypeWrapper(max_rec.getLoadMetric() / 1000) + TimeTypeWrapper(max_rec.getModeledLoad() / 1000) ); min_node.recs_.push_back(max_rec.getObj()); - min_node.load_ += max_rec.getLoadMetric(); + min_node.load_ += max_rec.getModeledLoad(); nodes.push_back(min_node); std::push_heap(nodes.begin(), nodes.end(), CompProcType()); } @@ -413,7 +413,9 @@ void GreedyLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto const& obj_time_milli = loadMilli(load_model_->getLoadMetric(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + auto const& obj_time_milli = loadMilli(load_model_->getModeledLoad( + obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} + )); this_load -= obj_time_milli; diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h b/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h index 3ab3286200..144ede8a55 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h +++ b/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h @@ -120,7 +120,7 @@ struct GreedyCollectMsg : GreedyLBTypes, collective::ReduceTMsg { MessageParentType::serialize(s); } - ObjSampleType const& getLoadMetric() const { + ObjSampleType const& getModeledLoad() const { return collective::ReduceTMsg::getConstVal().getSample(); } diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb_types.h b/src/vt/vrt/collection/balance/greedylb/greedylb_types.h index 1c3ef27b04..3e3f034faf 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb_types.h +++ b/src/vt/vrt/collection/balance/greedylb/greedylb_types.h @@ -71,7 +71,7 @@ struct GreedyRecord { : obj_(in_obj), load_(in_load) { } - LoadType getLoadMetric() const { return load_; } + LoadType getModeledLoad() const { return load_; } ObjType getObj() const { return obj_; } private: @@ -85,7 +85,7 @@ struct GreedyProc { NodeType const& in_node, GreedyLBTypes::LoadType const& in_load ) : node_(in_node), load_(in_load) {} - GreedyLBTypes::LoadType getLoadMetric() const { return load_; } + GreedyLBTypes::LoadType getModeledLoad() const { return load_; } NodeType node_ = uninitialized_destination; GreedyLBTypes::LoadType load_ = 0.0f; @@ -95,14 +95,14 @@ struct GreedyProc { template struct GreedyCompareLoadMin { bool operator()(T const& p1, T const& p2) const { - return p1.getLoadMetric() > p2.getLoadMetric(); + return p1.getModeledLoad() > p2.getModeledLoad(); } }; template struct GreedyCompareLoadMax { bool operator()(T const& p1, T const& p2) const { - return p1.getLoadMetric() < p2.getLoadMetric(); + return p1.getModeledLoad() < p2.getModeledLoad(); } }; diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc index 33ca2e0b96..90b70f41ba 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc @@ -311,7 +311,7 @@ void HierarchicalLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto const& obj_time_milli = loadMilli(load_model_->getLoadMetric(obj_id, + auto const& obj_time_milli = loadMilli(load_model_->getModeledLoad(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); this_load -= obj_time_milli; @@ -453,7 +453,7 @@ void HierarchicalLB::downTree( void HierarchicalLB::lbTreeUpHandler(LBTreeUpMsg* msg) { lbTreeUp( - msg->getChildLoad(), msg->getChild(), msg->getLoadMetric(), + msg->getChildLoad(), msg->getChild(), msg->getModeledLoad(), msg->getChildSize() ); } diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h b/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h index c9bbbcdfeb..44ea281813 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h @@ -74,7 +74,7 @@ struct LBTreeUpMsg : HierLBTypes, ::vt::Message { LoadType getChildLoad() const { return child_load_; } NodeType getChild() const { return child_; } - ObjSampleType const& getLoadMetric() const { return load_; } + ObjSampleType const& getModeledLoad() const { return load_; } ObjSampleType&& getLoadMove() { return std::move(load_); } NodeType getChildSize() const { return child_size_; } diff --git a/src/vt/vrt/collection/balance/lb_common.cc b/src/vt/vrt/collection/balance/lb_common.cc index 1027aad75f..0ad9824346 100644 --- a/src/vt/vrt/collection/balance/lb_common.cc +++ b/src/vt/vrt/collection/balance/lb_common.cc @@ -62,12 +62,12 @@ LoadSummary getObjectLoads( ) { LoadSummary ret; ret.whole_phase_load = - model->getLoadMetric(object, {when.phases, PhaseOffset::WHOLE_PHASE}); + model->getModeledLoad(object, {when.phases, PhaseOffset::WHOLE_PHASE}); unsigned int subphases = model->getNumSubphases(); for (unsigned int i = 0; i < subphases; ++i) ret.subphase_loads.push_back( - model->getLoadMetric(object, {when.phases, i}) + model->getModeledLoad(object, {when.phases, i}) ); return ret; diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index a59cbe8adb..7d9fd7cc13 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -622,7 +622,7 @@ void LBManager::computeStatistics( total_load_from_model = 0.; std::vector obj_load_model; for (auto elm : *model) { - auto work = model->getLoadMetric( + auto work = model->getModeledLoad( elm, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); obj_load_model.emplace_back( diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.cc b/src/vt/vrt/collection/balance/model/comm_overhead.cc index 489a5e0b31..879c38ccad 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.cc +++ b/src/vt/vrt/collection/balance/model/comm_overhead.cc @@ -61,8 +61,8 @@ void CommOverhead::setLoads(std::unordered_map const* pr } TimeType -CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { - auto work = ComposedModel::getLoadMetric(object, offset); +CommOverhead::getModeledLoad(ElementIDStruct object, PhaseOffset offset) { + auto work = ComposedModel::getModeledLoad(object, offset); auto phase = getNumCompletedPhases() + offset.phases; auto& comm = proc_comm_->at(phase); @@ -80,7 +80,7 @@ CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { return work + overhead; } else { // @todo: we don't record comm costs for each subphase---split it proportionally - auto whole_phase_work = ComposedModel::getLoadMetric( + auto whole_phase_work = ComposedModel::getModeledLoad( object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE} ); return work + overhead * ( static_cast(work)/whole_phase_work ); diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.h b/src/vt/vrt/collection/balance/model/comm_overhead.h index 989e7cd329..857907e56c 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.h +++ b/src/vt/vrt/collection/balance/model/comm_overhead.h @@ -68,7 +68,7 @@ struct CommOverhead : public ComposedModel { void setLoads(std::unordered_map const* proc_load, std::unordered_map const* proc_comm) override; - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; private: std::unordered_map const* proc_comm_; /**< Underlying comm data */ diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc index 71906601f0..6f3f5b9211 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.cc +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -55,8 +55,8 @@ void ComposedModel::updateLoads(PhaseType last_completed_phase) { } TimeType -ComposedModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) { - return base_->getLoadMetric(object, when); +ComposedModel::getModeledLoad(ElementIDStruct object, PhaseOffset when) { + return base_->getModeledLoad(object, when); } bool ComposedModel::hasRawLoad() const { diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index a4763122d0..459fd5f174 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -69,7 +69,7 @@ class ComposedModel : public LoadModel void updateLoads(PhaseType last_completed_phase) override; - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; diff --git a/src/vt/vrt/collection/balance/model/linear_model.cc b/src/vt/vrt/collection/balance/model/linear_model.cc index 505b477627..771fd0a497 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.cc +++ b/src/vt/vrt/collection/balance/model/linear_model.cc @@ -48,12 +48,12 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -TimeType LinearModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) { +TimeType LinearModel::getModeledLoad(ElementIDStruct object, PhaseOffset when) { using util::stats::LinearRegression; // Retrospective queries don't call for a prediction if (when.phases < 0) - return ComposedModel::getLoadMetric(object, when); + return ComposedModel::getModeledLoad(object, when); std::vector x; std::vector y; @@ -65,7 +65,7 @@ TimeType LinearModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) { for (int i = -1 * static_cast(phases); i < 0; i++) { x.emplace_back(i); past_phase.phases = i; - y.emplace_back(ComposedModel::getLoadMetric(object, past_phase)); + y.emplace_back(ComposedModel::getModeledLoad(object, past_phase)); } // should we re-create this every time? diff --git a/src/vt/vrt/collection/balance/model/linear_model.h b/src/vt/vrt/collection/balance/model/linear_model.h index 3b948cce3c..d0dd569bd6 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.h +++ b/src/vt/vrt/collection/balance/model/linear_model.h @@ -69,7 +69,7 @@ struct LinearModel : ComposedModel { past_len_(in_past_len) { } - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; private: diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 7f2f36f162..3ea49b5f48 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -207,7 +207,7 @@ struct LoadModel * * This would typically be called by LBManager collectively inside * an epoch that can be used for global communication in advance of - * any calls to getLoadMetric() + * any calls to getModeledLoad() * * The `setLoads` method must have been called before any call to * this. @@ -225,7 +225,7 @@ struct LoadModel * The `updateLoads` method must have been called before any call to * this. */ - virtual TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) = 0; + virtual TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) = 0; /** * \brief Whether or not the model is based on the RawData model diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.cc b/src/vt/vrt/collection/balance/model/multiple_phases.cc index 4ea7e01cf6..c52c8bd528 100644 --- a/src/vt/vrt/collection/balance/model/multiple_phases.cc +++ b/src/vt/vrt/collection/balance/model/multiple_phases.cc @@ -46,16 +46,16 @@ namespace vt { namespace vrt { namespace collection { namespace balance { TimeType -MultiplePhases::getLoadMetric(ElementIDStruct object, PhaseOffset when) { +MultiplePhases::getModeledLoad(ElementIDStruct object, PhaseOffset when) { // Retrospective queries don't call for a prediction if (when.phases < 0) - return ComposedModel::getLoadMetric(object, when); + return ComposedModel::getModeledLoad(object, when); TimeType sum = 0.0; for (int i = 0; i < future_phase_block_size_; ++i) { PhaseOffset p{future_phase_block_size_*when.phases + i, when.subphase}; - sum += ComposedModel::getLoadMetric(object, p); + sum += ComposedModel::getModeledLoad(object, p); } return sum; diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.h b/src/vt/vrt/collection/balance/model/multiple_phases.h index 3d899d17f5..899155c549 100644 --- a/src/vt/vrt/collection/balance/model/multiple_phases.h +++ b/src/vt/vrt/collection/balance/model/multiple_phases.h @@ -79,7 +79,7 @@ struct MultiplePhases : ComposedModel { , future_phase_block_size_(in_future_phase_block_size) { } - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; private: int future_phase_block_size_ = 0; diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 733893bd45..44815bf3a3 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -51,11 +51,11 @@ NaivePersistence::NaivePersistence(std::shared_ptr base) { } TimeType -NaivePersistence::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { +NaivePersistence::getModeledLoad(ElementIDStruct object, PhaseOffset offset) { if (offset.phases >= 0) offset.phases = -1; - return ComposedModel::getLoadMetric(object, offset); + return ComposedModel::getModeledLoad(object, offset); } TimeType NaivePersistence::getRawLoad(ElementIDStruct object, PhaseOffset offset) diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 6b0629d7cd..168761cb1d 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -60,7 +60,7 @@ struct NaivePersistence : public ComposedModel { * \param[in] base: The source of underlying load numbers to return; must not be null */ explicit NaivePersistence(std::shared_ptr base); - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset offset) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; }; // class NaivePersistence diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index 9f9ba06bf3..06d5272877 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -55,16 +55,16 @@ Norm::Norm(std::shared_ptr base, double power) vtAssert(power >= 0.0, "Reciprocal loads make no sense"); } -TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { +TimeType Norm::getModeledLoad(ElementIDStruct object, PhaseOffset offset) { if (offset.subphase != PhaseOffset::WHOLE_PHASE) - return ComposedModel::getLoadMetric(object, offset); + return ComposedModel::getModeledLoad(object, offset); if (std::isfinite(power_)) { double sum = 0.0; for (int i = 0; i < getNumSubphases(); ++i) { offset.subphase = i; - auto t = ComposedModel::getLoadMetric(object, offset); + auto t = ComposedModel::getModeledLoad(object, offset); sum += std::pow(t, power_); } @@ -75,7 +75,7 @@ TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset) { for (int i = 0; i < getNumSubphases(); ++i) { offset.subphase = i; - auto t = ComposedModel::getLoadMetric(object, offset); + auto t = ComposedModel::getModeledLoad(object, offset); max = std::max(max, t); } diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h index 6dba219eae..cb096f4e7c 100644 --- a/src/vt/vrt/collection/balance/model/norm.h +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -64,7 +64,7 @@ class Norm : public ComposedModel { */ Norm(std::shared_ptr base, double power); - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; private: const double power_; diff --git a/src/vt/vrt/collection/balance/model/per_collection.cc b/src/vt/vrt/collection/balance/model/per_collection.cc index 1226baf6fd..f495689c18 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.cc +++ b/src/vt/vrt/collection/balance/model/per_collection.cc @@ -69,14 +69,14 @@ void PerCollection::updateLoads(PhaseType last_completed_phase) { } TimeType -PerCollection::getLoadMetric(ElementIDStruct object, PhaseOffset when) { +PerCollection::getModeledLoad(ElementIDStruct object, PhaseOffset when) { // See if some specific model has been given for the object in question auto mi = models_.find(theNodeLBData()->getCollectionProxyForElement(object)); if (mi != models_.end()) - return mi->second->getLoadMetric(object, when); + return mi->second->getModeledLoad(object, when); // Otherwise, default to the given base model - return ComposedModel::getLoadMetric(object, when); + return ComposedModel::getModeledLoad(object, when); } bool PerCollection::hasRawLoad() const { diff --git a/src/vt/vrt/collection/balance/model/per_collection.h b/src/vt/vrt/collection/balance/model/per_collection.h index 39a984e3e4..e3971554d3 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.h +++ b/src/vt/vrt/collection/balance/model/per_collection.h @@ -78,7 +78,7 @@ struct PerCollection : public ComposedModel void updateLoads(PhaseType last_completed_phase) override; - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc index 033465f46e..afafcb61d7 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc @@ -54,18 +54,18 @@ PersistenceMedianLastN::PersistenceMedianLastN(std::shared_ptr base, vtAssert(n > 0, "Cannot take a median over no phases"); } -TimeType PersistenceMedianLastN::getLoadMetric( +TimeType PersistenceMedianLastN::getModeledLoad( ElementIDStruct object, PhaseOffset when ) { // Retrospective queries don't call for a prospective calculation if (when.phases < 0) - return ComposedModel::getLoadMetric(object, when); + return ComposedModel::getModeledLoad(object, when); unsigned int phases = std::min(n_, getNumCompletedPhases()); std::vector times(phases); for (unsigned int i = 1; i <= phases; ++i) { PhaseOffset p{-1*static_cast(i), when.subphase}; - TimeType t = ComposedModel::getLoadMetric(object, p); + TimeType t = ComposedModel::getModeledLoad(object, p); times[i-1] = t; } diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h index b684f5cf0f..55259d59d1 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h @@ -65,7 +65,7 @@ struct PersistenceMedianLastN : public ComposedModel */ PersistenceMedianLastN(std::shared_ptr base, unsigned int n); - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) override; private: diff --git a/src/vt/vrt/collection/balance/model/proposed_reassignment.cc b/src/vt/vrt/collection/balance/model/proposed_reassignment.cc index 26ace02ed2..06e5820ca1 100644 --- a/src/vt/vrt/collection/balance/model/proposed_reassignment.cc +++ b/src/vt/vrt/collection/balance/model/proposed_reassignment.cc @@ -94,7 +94,7 @@ int ProposedReassignment::getNumObjects() } TimeType -ProposedReassignment::getLoadMetric(ElementIDStruct object, PhaseOffset when) { +ProposedReassignment::getModeledLoad(ElementIDStruct object, PhaseOffset when) { auto a = reassignment_->arrive_.find(object); if (a != reassignment_->arrive_.end()) { return std::get<0>(a->second).get(when); @@ -104,7 +104,7 @@ ProposedReassignment::getLoadMetric(ElementIDStruct object, PhaseOffset when) { vtAssert(reassignment_->depart_.find(object) == reassignment_->depart_.end(), "Departing object should not appear as a load query subject"); - return ComposedModel::getLoadMetric(object, when); + return ComposedModel::getModeledLoad(object, when); } TimeType ProposedReassignment::getRawLoad(ElementIDStruct object, PhaseOffset when) diff --git a/src/vt/vrt/collection/balance/model/proposed_reassignment.h b/src/vt/vrt/collection/balance/model/proposed_reassignment.h index a5aaad08f0..bdb7e01ce0 100644 --- a/src/vt/vrt/collection/balance/model/proposed_reassignment.h +++ b/src/vt/vrt/collection/balance/model/proposed_reassignment.h @@ -57,7 +57,7 @@ struct ProposedReassignment : public ComposedModel { ObjectIterator begin() override; int getNumObjects() override; - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; private: diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index 36122a9fb6..9d26ce6a13 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -94,7 +94,7 @@ int RawData::getNumSubphases() { return subphases; } -TimeType RawData::getLoadMetric(ElementIDStruct object, PhaseOffset offset) +TimeType RawData::getModeledLoad(ElementIDStruct object, PhaseOffset offset) { { return getRawLoad(object, offset); } diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index 5c879fc509..64fec7d482 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -59,7 +59,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { struct RawData : public LoadModel { RawData() = default; void updateLoads(PhaseType last_completed_phase) override; - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override { return true; } TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; TimeType getComm(ElementIDStruct object, PhaseOffset when) override; diff --git a/src/vt/vrt/collection/balance/model/select_subphases.cc b/src/vt/vrt/collection/balance/model/select_subphases.cc index 9c26662c02..1777db5b14 100644 --- a/src/vt/vrt/collection/balance/model/select_subphases.cc +++ b/src/vt/vrt/collection/balance/model/select_subphases.cc @@ -59,18 +59,18 @@ SelectSubphases::SelectSubphases(std::shared_ptr base, std::vector base, std::vector subphases); - TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; int getNumSubphases() override; std::vector subphases_; diff --git a/src/vt/vrt/collection/balance/node_lb_data.cc b/src/vt/vrt/collection/balance/node_lb_data.cc index 3ee8d52001..0c16945288 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.cc +++ b/src/vt/vrt/collection/balance/node_lb_data.cc @@ -258,7 +258,7 @@ void NodeLBData::addNodeLBData( ); auto const phase = in->getPhase(); - auto const& total_load = in->getLoadMetric(phase, focused_subphase); + auto const& total_load = in->getModeledLoad(phase, focused_subphase); auto &phase_data = lb_data_->node_data_[phase]; auto elm_iter = phase_data.find(id); diff --git a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc index a7e5288c54..1dea2f0994 100644 --- a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc +++ b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc @@ -76,7 +76,9 @@ void RotateLB::runLB(TimeType) { } for (auto obj : *load_model_) { - TimeTypeWrapper const load = load_model_->getLoadMetric(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); + TimeTypeWrapper const load = load_model_->getModeledLoad( + obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} + ); vt_debug_print( terse, lb, "\t RotateLB::migrating object to: obj={}, load={}, to_node={}\n", diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index a176089a99..650014e079 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1362,7 +1362,7 @@ void TemperedLB::migrate() { } TimeType TemperedLB::getTotalWork(const elm::ElementIDStruct& obj) { - return load_model_->getLoadMetric( + return load_model_->getModeledLoad( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); } diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc index 0012b27b83..3ef77a0ea4 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc @@ -94,7 +94,7 @@ TimeType TemperedWMin::getTotalWork(const elm::ElementIDStruct& obj) { balance::PhaseOffset when = {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}; - return alpha_ * load_model_->getLoadMetric(obj, when) + + return alpha_ * load_model_->getModeledLoad(obj, when) + beta_ * load_model_->getComm(obj, when) + gamma_; } diff --git a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc index 086addcd59..9b10cc6fed 100644 --- a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc +++ b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc @@ -483,7 +483,7 @@ std::unique_ptr ZoltanLB::makeGraph() { { int idx = 0; for (auto&& obj : load_objs) { - auto load = load_model_->getLoadMetric( + auto load = load_model_->getModeledLoad( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); diff --git a/tests/unit/collection/test_model_comm_overhead.nompi.cc b/tests/unit/collection/test_model_comm_overhead.nompi.cc index 0ac766e443..ba73ed37e8 100644 --- a/tests/unit/collection/test_model_comm_overhead.nompi.cc +++ b/tests/unit/collection/test_model_comm_overhead.nompi.cc @@ -86,7 +86,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { + TimeType getModeledLoad(ElementIDStruct id, PhaseOffset phase) override { const auto work = proc_load_->at(0).at(id).whole_phase_load; if (phase.subphase == PhaseOffset::WHOLE_PHASE) { @@ -167,7 +167,7 @@ TEST_F(TestModelCommOverhead, test_model_comm_overhead_1) { ++objects_seen; const auto subphase = num_phases == 0 ? PhaseOffset::WHOLE_PHASE : 1; - auto work_val = test_model->getLoadMetric(obj, PhaseOffset{0, subphase}); + auto work_val = test_model->getModeledLoad(obj, PhaseOffset{0, subphase}); EXPECT_EQ(work_val, expected_work[num_phases]) << fmt::format("For element={} on phase={}\n", obj, num_phases); } diff --git a/tests/unit/collection/test_model_linear_model.nompi.cc b/tests/unit/collection/test_model_linear_model.nompi.cc index 0a5b85817f..a432b3b958 100644 --- a/tests/unit/collection/test_model_linear_model.nompi.cc +++ b/tests/unit/collection/test_model_linear_model.nompi.cc @@ -79,7 +79,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { + TimeType getModeledLoad(ElementIDStruct id, PhaseOffset phase) override { // Most recent phase will be at the end of vector return proc_load_->at(num_phases + phase.phases).at(id).whole_phase_load; } @@ -149,7 +149,7 @@ TEST_F(TestLinearModel, test_model_linear_model_1) { ++num_phases; for (auto&& obj : *test_model) { - auto work_val = test_model->getLoadMetric( + auto work_val = test_model->getModeledLoad( obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE} ); EXPECT_EQ( diff --git a/tests/unit/collection/test_model_multiple_phases.nompi.cc b/tests/unit/collection/test_model_multiple_phases.nompi.cc index bf227a8e06..ce43affa52 100644 --- a/tests/unit/collection/test_model_multiple_phases.nompi.cc +++ b/tests/unit/collection/test_model_multiple_phases.nompi.cc @@ -77,7 +77,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { + TimeType getModeledLoad(ElementIDStruct id, PhaseOffset phase) override { // Here we return predicted loads for future phases // For the sake of the test we use values from the past phases return proc_load_->at(phase.phases).at(id).whole_phase_load; @@ -119,7 +119,7 @@ TEST_F(TestModelMultiplePhases, test_model_multiple_phases_1) { test_model->updateLoads(3); for (auto&& obj : *test_model) { - auto work_val = test_model->getLoadMetric( + auto work_val = test_model->getModeledLoad( obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE} ); EXPECT_EQ(work_val, obj.id == 1 ? TimeType{100} : TimeType{85}); diff --git a/tests/unit/collection/test_model_naive_persistence.nompi.cc b/tests/unit/collection/test_model_naive_persistence.nompi.cc index 07cab43c02..f9b7636940 100644 --- a/tests/unit/collection/test_model_naive_persistence.nompi.cc +++ b/tests/unit/collection/test_model_naive_persistence.nompi.cc @@ -81,7 +81,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { + TimeType getModeledLoad(ElementIDStruct id, PhaseOffset phase) override { EXPECT_LE(phase.phases, -1); return proc_load_->at(getIndexFromPhase(phase.phases)).at(id).whole_phase_load; } @@ -124,7 +124,7 @@ TEST_F(TestModelNaivePersistence, test_model_naive_persistence_1) { for (auto it = test_model->begin(); it != test_model->end(); ++it) { auto &&obj = *it; for (auto phase : {0, -1, -2, -3, -4}) { - auto work_val = test_model->getLoadMetric(obj, PhaseOffset{phase, 1}); + auto work_val = test_model->getModeledLoad(obj, PhaseOffset{phase, 1}); EXPECT_EQ(work_val, proc_loads.at(getIndexFromPhase(phase)).at(obj).whole_phase_load); } } diff --git a/tests/unit/collection/test_model_norm.nompi.cc b/tests/unit/collection/test_model_norm.nompi.cc index 44016b36c6..9cd6d3a5f6 100644 --- a/tests/unit/collection/test_model_norm.nompi.cc +++ b/tests/unit/collection/test_model_norm.nompi.cc @@ -84,7 +84,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { + TimeType getModeledLoad(ElementIDStruct id, PhaseOffset phase) override { return proc_load_->at(0).at(id).subphase_loads.at(phase.subphase); } @@ -125,7 +125,7 @@ TEST_F(TestModelNorm, test_model_norm_1) { // offset.subphase != PhaseOffset::WHOLE_PHASE // expect work load value for given subphase - auto work_val = test_model->getLoadMetric(obj, PhaseOffset{0, iter}); + auto work_val = test_model->getModeledLoad(obj, PhaseOffset{0, iter}); EXPECT_EQ(work_val, proc_load[0][obj].subphase_loads[iter]); } @@ -155,7 +155,7 @@ TEST_F(TestModelNorm, test_model_norm_2) { ++objects_seen; auto work_val = - test_model->getLoadMetric(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getModeledLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_NEAR(work_val, expected_norms[obj.id - 1], 0.001); } @@ -184,7 +184,7 @@ TEST_F(TestModelNorm, test_model_norm_3) { ++objects_seen; auto work_val = - test_model->getLoadMetric(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getModeledLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, expected_norms[obj.id - 1]); } diff --git a/tests/unit/collection/test_model_per_collection.extended.cc b/tests/unit/collection/test_model_per_collection.extended.cc index 2d03ec6aba..1b33a20394 100644 --- a/tests/unit/collection/test_model_per_collection.extended.cc +++ b/tests/unit/collection/test_model_per_collection.extended.cc @@ -74,7 +74,7 @@ struct ConstantTestModel : ComposedModel { proxy_(in_proxy) { } - TimeType getLoadMetric(ElementIDStruct, PhaseOffset) override { + TimeType getModeledLoad(ElementIDStruct, PhaseOffset) override { return static_cast(proxy_); } @@ -153,7 +153,7 @@ TEST_F(TestModelPerCollection, test_model_per_collection_1) { // model to function model->updateLoads(0); for (auto&& obj : *model) { - auto work_val = model->getLoadMetric( + auto work_val = model->getModeledLoad( obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE} ); if (id_proxy_map.find(obj) != id_proxy_map.end()) { diff --git a/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc b/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc index 9395e3ac4e..108153070b 100644 --- a/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc +++ b/tests/unit/collection/test_model_persistence_median_last_n.nompi.cc @@ -79,7 +79,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { + TimeType getModeledLoad(ElementIDStruct id, PhaseOffset phase) override { // Most recent phase will be at the end of vector return proc_load_->at(num_phases + phase.phases).at(id).whole_phase_load; } @@ -148,7 +148,7 @@ TEST_F(TestModelPersistenceMedianLastN, test_model_persistence_median_last_n_1) ++num_phases; for (auto&& obj : *test_model) { - auto work_val = test_model->getLoadMetric( + auto work_val = test_model->getModeledLoad( obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE} ); EXPECT_EQ( diff --git a/tests/unit/collection/test_model_raw_data.nompi.cc b/tests/unit/collection/test_model_raw_data.nompi.cc index ed3032865b..614e54e8b1 100644 --- a/tests/unit/collection/test_model_raw_data.nompi.cc +++ b/tests/unit/collection/test_model_raw_data.nompi.cc @@ -101,12 +101,12 @@ TEST_F(TestRawData, test_model_raw_data_scalar) { EXPECT_TRUE(obj.id == 1 || obj.id == 2); objects_seen++; - auto work_val = test_model->getLoadMetric( + auto work_val = test_model->getModeledLoad( obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE} ); EXPECT_EQ(work_val, load_holder[iter][obj].whole_phase_load); - auto sub_work_val = test_model->getLoadMetric(obj, PhaseOffset{-1, 0}); + auto sub_work_val = test_model->getModeledLoad(obj, PhaseOffset{-1, 0}); EXPECT_EQ(sub_work_val, load_holder[iter][obj].subphase_loads[0]); auto raw_load_val = test_model->getRawLoad(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}); diff --git a/tests/unit/collection/test_model_select_subphases.nompi.cc b/tests/unit/collection/test_model_select_subphases.nompi.cc index 62aea3edf9..6880f0e95c 100644 --- a/tests/unit/collection/test_model_select_subphases.nompi.cc +++ b/tests/unit/collection/test_model_select_subphases.nompi.cc @@ -83,7 +83,7 @@ struct StubModel : LoadModel { void updateLoads(PhaseType) override {} - TimeType getLoadMetric(ElementIDStruct id, PhaseOffset phase) override { + TimeType getModeledLoad(ElementIDStruct id, PhaseOffset phase) override { return proc_load_->at(0).at(id).subphase_loads.at(phase.subphase); } @@ -146,7 +146,7 @@ TEST_F(TestModelSelectSubphases, test_model_select_subphases_1) { // offset.subphase != PhaseOffset::WHOLE_PHASE // expect work load value for given subphase - auto work_val = test_model->getLoadMetric(obj, PhaseOffset{0, iter}); + auto work_val = test_model->getModeledLoad(obj, PhaseOffset{0, iter}); EXPECT_EQ(work_val, expected_values[obj][iter]); } @@ -187,7 +187,7 @@ TEST_F(TestModelSelectSubphases, test_model_select_subphases_2) { ++objects_seen; auto work_val = - test_model->getLoadMetric(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); + test_model->getModeledLoad(obj, PhaseOffset{0, PhaseOffset::WHOLE_PHASE}); EXPECT_EQ(work_val, expected_values[obj]); } From f6ebc31a04255c50140015fad1fa57ffc8fb30c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 21 Apr 2022 14:28:31 +0200 Subject: [PATCH 016/106] #1672: lb: rename `getComm` to `getModeledComm` --- src/vt/vrt/collection/balance/model/load_model.h | 2 +- src/vt/vrt/collection/balance/model/raw_data.cc | 2 +- src/vt/vrt/collection/balance/model/raw_data.h | 2 +- src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 3ea49b5f48..d290beb15a 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -259,7 +259,7 @@ struct LoadModel * The `updateLoads` method must have been called before any call to * this. */ - virtual TimeType getComm(ElementIDStruct object, PhaseOffset when) { + virtual TimeType getModeledComm(ElementIDStruct object, PhaseOffset when) { return {}; } diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index 9d26ce6a13..1cf29e8dc3 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -107,7 +107,7 @@ TimeType RawData::getRawLoad(ElementIDStruct object, PhaseOffset offset) { return proc_load_->at(phase).at(object).get(offset); } -TimeType RawData::getComm(ElementIDStruct object, PhaseOffset when) { +TimeType RawData::getModeledComm(ElementIDStruct object, PhaseOffset when) { auto phase = getNumCompletedPhases() + when.phases; auto& comm = proc_comm_->at(phase); diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index 64fec7d482..81b45cc5cc 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -62,7 +62,7 @@ struct RawData : public LoadModel { TimeType getModeledLoad(ElementIDStruct object, PhaseOffset when) override; bool hasRawLoad() const override { return true; } TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override; - TimeType getComm(ElementIDStruct object, PhaseOffset when) override; + TimeType getModeledComm(ElementIDStruct object, PhaseOffset when) override; void setLoads(std::unordered_map const* proc_load, std::unordered_map const* proc_comm) override; diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc index 3ef77a0ea4..5eacb1b37a 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc @@ -95,7 +95,7 @@ TimeType TemperedWMin::getTotalWork(const elm::ElementIDStruct& obj) { {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}; return alpha_ * load_model_->getModeledLoad(obj, when) + - beta_ * load_model_->getComm(obj, when) + gamma_; + beta_ * load_model_->getModeledComm(obj, when) + gamma_; } }}}} // namespace vt::vrt::collection::lb From a9a0abd48f86cd1710f1718c0a8ccdc6ea5a5d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 21 Apr 2022 19:39:43 +0200 Subject: [PATCH 017/106] #1672: lb: add helper method for retrieving comm data --- src/vt/vrt/collection/balance/baselb/baselb.cc | 6 ++---- .../vrt/collection/balance/lb_invoke/lb_manager.cc | 12 ++++-------- src/vt/vrt/collection/balance/node_lb_data.cc | 5 +++++ src/vt/vrt/collection/balance/node_lb_data.h | 8 +++++--- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index e0fa94035e..40f004554b 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -263,11 +263,9 @@ void BaseLB::finalize(CountMsg* msg) { void BaseLB::recvSharedEdges(CommMsg* msg) { auto phase = thePhase()->getCurrentPhase(); - auto iter = theNodeStats()->getNodeComm()->find(phase); - - if (iter != theNodeStats()->getNodeComm()->end()) { - auto comm_map = &iter->second; + auto comm_map = theNodeStats()->getNodeComm(phase); + if (comm_map != nullptr) { auto& comm = msg->comm_; for (auto&& elm : comm) { comm_map->insert(elm); diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 7d9fd7cc13..41a17f9c13 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -732,21 +732,17 @@ void makeGraphSymmetric( PhaseType phase, objgroup::proxy::Proxy proxy ) { auto const this_node = theContext()->getNode(); - - // TODO: extract to helper method - elm::CommMapType empty_comm; - elm::CommMapType const* comm_data = &empty_comm; auto iter = theNodeStats()->getNodeComm()->find(phase); - // TODO: is it an error when we don't find the phase? - if (iter != theNodeStats()->getNodeComm()->end()) { - comm_data = &iter->second; + if (iter == theNodeStats()->getNodeComm()->end()) { + return; } // Go through the comm graph and extract out paired SendRecv edges that are // not self-send and have a non-local edge + elm::CommMapType const& comm_data = iter->second; std::unordered_map shared_edges; - for (auto&& elm : *comm_data) { + for (auto&& elm : comm_data) { if ( elm.first.commCategory() == elm::CommCategory::SendRecv and not elm.first.selfEdge() diff --git a/src/vt/vrt/collection/balance/node_lb_data.cc b/src/vt/vrt/collection/balance/node_lb_data.cc index 0c16945288..4ebcf907e3 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.cc +++ b/src/vt/vrt/collection/balance/node_lb_data.cc @@ -103,6 +103,11 @@ std::unordered_map> con return &lb_data_->node_subphase_comm_; } +CommMapType* NodeLBData::getNodeComm(PhaseType phase) { + auto iter = lb_data_->node_comm_.find(phase); + return (iter != lb_data_->node_comm_.end()) ? &iter->second : nullptr; +} + void NodeLBData::clearLBData() { lb_data_->clear(); node_migrate_.clear(); diff --git a/src/vt/vrt/collection/balance/node_lb_data.h b/src/vt/vrt/collection/balance/node_lb_data.h index 9e14cf6fde..fa5277ba10 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.h +++ b/src/vt/vrt/collection/balance/node_lb_data.h @@ -176,11 +176,13 @@ struct NodeLBData : runtime::component::Component { std::unordered_map const* getNodeComm() const; /** - * \internal \brief Get stored object comm graph + * \internal \brief Get stored object comm data for a specific phase + * + * \param[in] phase phase * - * \return a pointer to the comm graph + * \return a pointer to the comm data or a nullptr when no data is found */ - std::unordered_map* getNodeComm(); + CommMapType* getNodeComm(PhaseType phase); /** * \internal \brief Get stored object comm subphase graph From a2daa34e39a71cabcdbfb77fba79248309170330 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 21 Apr 2022 20:10:08 +0200 Subject: [PATCH 018/106] #1672: lb: make implementation private --- src/vt/vrt/collection/balance/temperedlb/temperedlb.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 03508d2948..f41d9bc92a 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -121,9 +121,6 @@ struct TemperedLB : BaseLB { void setupDone(ReduceMsgType* msg); - std::mt19937 gen_propagate_; - std::mt19937 gen_sample_; - private: uint16_t f_ = 0; uint8_t k_max_ = 0; @@ -182,6 +179,8 @@ struct TemperedLB : BaseLB { bool setup_done_ = false; bool propagate_next_round_ = false; std::vector propagated_k_; + std::mt19937 gen_propagate_; + std::mt19937 gen_sample_; StatisticMapType stats; LoadType this_load = 0.0f; }; From 8124440d7293c9ef2af3fa4d6727c837973ba31d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 21 Apr 2022 20:27:06 +0200 Subject: [PATCH 019/106] #1672: lb: revert to `getLoad` in ElementStats --- src/vt/elm/elm_lb_data.cc | 6 +++--- src/vt/elm/elm_lb_data.h | 4 ++-- src/vt/vrt/collection/balance/node_lb_data.cc | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/vt/elm/elm_lb_data.cc b/src/vt/elm/elm_lb_data.cc index 266dc54e61..d5dc5d5584 100644 --- a/src/vt/elm/elm_lb_data.cc +++ b/src/vt/elm/elm_lb_data.cc @@ -163,7 +163,7 @@ PhaseType ElementLBData::getPhase() const { return cur_phase_; } -TimeType ElementLBData::getModeledLoad(PhaseType const& phase) const { +TimeType ElementLBData::getLoad(PhaseType const& phase) const { auto iter = phase_timings_.find(phase); if (iter != phase_timings_.end()) { TimeTypeWrapper const total_load = phase_timings_.at(phase); @@ -181,9 +181,9 @@ TimeType ElementLBData::getModeledLoad(PhaseType const& phase) const { } TimeType -ElementLBData::getModeledLoad(PhaseType phase, SubphaseType subphase) const { +ElementLBData::getLoad(PhaseType phase, SubphaseType subphase) const { if (subphase == no_subphase) - return getModeledLoad(phase); + return getLoad(phase); auto const& subphase_loads = subphase_timings_.at(phase); diff --git a/src/vt/elm/elm_lb_data.h b/src/vt/elm/elm_lb_data.h index 6d1cd8d78e..554856f8d1 100644 --- a/src/vt/elm/elm_lb_data.h +++ b/src/vt/elm/elm_lb_data.h @@ -84,8 +84,8 @@ struct ElementLBData { void updatePhase(PhaseType const& inc = 1); void resetPhase(); PhaseType getPhase() const; - TimeType getModeledLoad(PhaseType const& phase) const; - TimeType getModeledLoad(PhaseType phase, SubphaseType subphase) const; + TimeType getLoad(PhaseType const& phase) const; + TimeType getLoad(PhaseType phase, SubphaseType subphase) const; CommMapType const& getComm(PhaseType const& phase); std::vector const& getSubphaseComm(PhaseType phase); diff --git a/src/vt/vrt/collection/balance/node_lb_data.cc b/src/vt/vrt/collection/balance/node_lb_data.cc index 4ebcf907e3..c74b155ff0 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.cc +++ b/src/vt/vrt/collection/balance/node_lb_data.cc @@ -263,7 +263,7 @@ void NodeLBData::addNodeLBData( ); auto const phase = in->getPhase(); - auto const& total_load = in->getModeledLoad(phase, focused_subphase); + auto const& total_load = in->getLoad(phase, focused_subphase); auto &phase_data = lb_data_->node_data_[phase]; auto elm_iter = phase_data.find(id); From 02cae11a25bd779dd573c965995e1d873c3ad610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 27 Apr 2022 18:14:53 +0200 Subject: [PATCH 020/106] #1672: tests: add test for `makeGraphSymmetric` --- .../collection/balance/lb_invoke/lb_manager.h | 2 ++ tests/unit/collection/test_lb.extended.cc | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index c9249480a2..413630ece2 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -97,6 +97,8 @@ struct LBManager : runtime::component::Component { static std::unique_ptr construct(); public: + LBProxyType getLB() { return lb_instances_["chosen"]; } + /** * \internal * \brief Decide which LB to invoke given a certain phase diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index 934fa47399..7682ca7721 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -49,6 +49,7 @@ #include "vt/vrt/collection/manager.h" #include "vt/vrt/collection/balance/lb_data_holder.h" +#include "vt/vrt/collection/balance/lb_invoke/lb_manager.h" #include "vt/utils/json/json_reader.h" #include "vt/utils/json/json_appender.h" @@ -152,6 +153,37 @@ TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_keep_last_elm) { runTest(GetParam()); } +TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { + runTest("TemperedWMin"); + + // auto proxy = theLBManager()->getLB(); + // runInEpochCollective( + // "test_make_graph_symmetric -> makeGraphSymmetric", + // [phase, proxy] { vrt::collection::balance::makeGraphSymmetric(phase, proxy); } + // ); + + auto phase = num_phases - 1; + auto iter = theNodeStats()->getNodeComm()->find(phase); + ASSERT_NE(iter, theNodeStats()->getNodeComm()->end()); + + elm::CommMapType const& comm_data = iter->second; + + fmt::print( + "\ntest_make_graph_symmetric: comm_map.size={}\n", comm_data.size() + ); + for (auto&& elm : comm_data) { + if ( + elm.first.commCategory() == elm::CommCategory::SendRecv and + not elm.first.selfEdge() + ) { + fmt::print( + "test_make_graph_symmetric: from={}, to={}\n", elm.first.fromObj(), + elm.first.toObj() + ); + } + } +} + struct MyCol2 : vt::Collection {}; using TestLoadBalancerNoWork = TestParallelHarness; From 730d0501b3072fbc80912de10962ea69b3e8d825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 29 Apr 2022 19:29:10 +0200 Subject: [PATCH 021/106] #1672: lb: rename `getTotalWork` to `getModeledWork` --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 ++-- src/vt/vrt/collection/balance/temperedlb/temperedlb.h | 2 +- src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc | 2 +- src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 650014e079..b575afbe1b 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -499,7 +499,7 @@ void TemperedLB::doLBStages(TimeType start_imb) { cur_objs_.clear(); for (auto obj : *load_model_) { if (obj.isMigratable()) { - cur_objs_[obj] = getTotalWork(obj); + cur_objs_[obj] = getModeledWork(obj); } } this_new_load_ = this_load; @@ -1361,7 +1361,7 @@ void TemperedLB::migrate() { vtAssertExpr(false); } -TimeType TemperedLB::getTotalWork(const elm::ElementIDStruct& obj) { +TimeType TemperedLB::getModeledWork(const elm::ElementIDStruct& obj) { return load_model_->getModeledLoad( obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index f41d9bc92a..8e6f68b5b6 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -111,7 +111,7 @@ struct TemperedLB : BaseLB { ElementLoadType::iterator selectObject( LoadType size, ElementLoadType& load, std::set const& available ); - virtual TimeType getTotalWork(const elm::ElementIDStruct& obj); + virtual TimeType getModeledWork(const elm::ElementIDStruct& obj); void lazyMigrateObjsTo(EpochType epoch, NodeType node, ObjsType const& objs); void inLazyMigrations(balance::LazyMigrationMsg* msg); diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc index 5eacb1b37a..0f6fb82e9e 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc @@ -90,7 +90,7 @@ void TemperedWMin::inputParams(balance::SpecEntry* spec) { gamma_ = spec->getOrDefault("gamma", gamma_); } -TimeType TemperedWMin::getTotalWork(const elm::ElementIDStruct& obj) { +TimeType TemperedWMin::getModeledWork(const elm::ElementIDStruct& obj) { balance::PhaseOffset when = {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}; diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h index 6f22d3a3a1..c008741b4a 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h @@ -61,7 +61,7 @@ struct TemperedWMin : TemperedLB { void inputParams(balance::SpecEntry* spec) override; protected: - TimeType getTotalWork(const elm::ElementIDStruct& obj) override; + TimeType getModeledWork(const elm::ElementIDStruct& obj) override; private: double alpha_ = 1.0; From 8be2e96c6234d2d6b0c20cb46b16e321771eaa2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 6 May 2022 16:36:35 +0200 Subject: [PATCH 022/106] #1672: use `theNodeLBData` where necessary --- src/vt/vrt/collection/balance/baselb/baselb.cc | 2 +- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 4 ++-- tests/unit/collection/test_lb.extended.cc | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 40f004554b..36eae88ec7 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -263,7 +263,7 @@ void BaseLB::finalize(CountMsg* msg) { void BaseLB::recvSharedEdges(CommMsg* msg) { auto phase = thePhase()->getCurrentPhase(); - auto comm_map = theNodeStats()->getNodeComm(phase); + auto comm_map = theNodeLBData()->getNodeComm(phase); if (comm_map != nullptr) { auto& comm = msg->comm_; diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 41a17f9c13..e9a0eacbc9 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -732,8 +732,8 @@ void makeGraphSymmetric( PhaseType phase, objgroup::proxy::Proxy proxy ) { auto const this_node = theContext()->getNode(); - auto iter = theNodeStats()->getNodeComm()->find(phase); - if (iter == theNodeStats()->getNodeComm()->end()) { + auto iter = theNodeLBData()->getNodeComm()->find(phase); + if (iter == theNodeLBData()->getNodeComm()->end()) { return; } diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index 7682ca7721..74ea5e7740 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -49,6 +49,7 @@ #include "vt/vrt/collection/manager.h" #include "vt/vrt/collection/balance/lb_data_holder.h" +#include "vt/vrt/collection/balance/node_lb_data.h" #include "vt/vrt/collection/balance/lb_invoke/lb_manager.h" #include "vt/utils/json/json_reader.h" #include "vt/utils/json/json_appender.h" @@ -163,8 +164,8 @@ TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { // ); auto phase = num_phases - 1; - auto iter = theNodeStats()->getNodeComm()->find(phase); - ASSERT_NE(iter, theNodeStats()->getNodeComm()->end()); + auto iter = theNodeLBData()->getNodeComm()->find(phase); + ASSERT_NE(iter, theNodeLBData()->getNodeComm()->end()); elm::CommMapType const& comm_data = iter->second; From 726e1210eef1f1e57b89838ecf3db564aa803ceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 10 May 2022 18:23:55 +0200 Subject: [PATCH 023/106] #1672: add debug category for TemperedWMin --- src/vt/configs/arguments/app_config.h | 2 ++ src/vt/configs/arguments/args.cc | 3 +++ src/vt/configs/debug/debug_config.h | 4 +++- src/vt/runtime/runtime_banner.cc | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/vt/configs/arguments/app_config.h b/src/vt/configs/arguments/app_config.h index e7550e1ba9..7aef235612 100644 --- a/src/vt/configs/arguments/app_config.h +++ b/src/vt/configs/arguments/app_config.h @@ -205,6 +205,7 @@ struct AppConfig { bool vt_debug_handler = false; bool vt_debug_hierlb = false; bool vt_debug_temperedlb = false; + bool vt_debug_temperedwmin = false; bool vt_debug_scatter = false; bool vt_debug_sequence = false; bool vt_debug_sequence_vrt = false; @@ -365,6 +366,7 @@ struct AppConfig { | vt_debug_handler | vt_debug_hierlb | vt_debug_temperedlb + | vt_debug_temperedwmin | vt_debug_scatter | vt_debug_sequence | vt_debug_sequence_vrt diff --git a/src/vt/configs/arguments/args.cc b/src/vt/configs/arguments/args.cc index 3901fcc033..d3d5551f13 100644 --- a/src/vt/configs/arguments/args.cc +++ b/src/vt/configs/arguments/args.cc @@ -360,6 +360,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) { auto pap = "Enable debug_handler = \"" debug_pp(handler) "\""; auto qap = "Enable debug_hierlb = \"" debug_pp(hierlb) "\""; auto qbp = "Enable debug_temperedlb = \"" debug_pp(temperedlb) "\""; + auto qcp = "Enable debug_temperedwmin = \"" debug_pp(temperedwmin) "\""; auto rap = "Enable debug_scatter = \"" debug_pp(scatter) "\""; auto sap = "Enable debug_sequence = \"" debug_pp(sequence) "\""; auto tap = "Enable debug_sequence_vrt = \"" debug_pp(sequence_vrt) "\""; @@ -398,6 +399,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) { auto pa = app.add_flag("--vt_debug_handler", appConfig.vt_debug_handler, pap); auto qa = app.add_flag("--vt_debug_hierlb", appConfig.vt_debug_hierlb, qap); auto qb = app.add_flag("--vt_debug_temperedlb", appConfig.vt_debug_temperedlb, qbp); + auto qc = app.add_flag("--vt_debug_temperedwmin", appConfig.vt_debug_temperedwmin, qcp); auto ra = app.add_flag("--vt_debug_scatter", appConfig.vt_debug_scatter, rap); auto sa = app.add_flag("--vt_debug_sequence", appConfig.vt_debug_sequence, sap); auto ta = app.add_flag("--vt_debug_sequence_vrt", appConfig.vt_debug_sequence_vrt, tap); @@ -436,6 +438,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) { pa->group(debugGroup); qa->group(debugGroup); qb->group(debugGroup); + qc->group(debugGroup); ra->group(debugGroup); sa->group(debugGroup); ta->group(debugGroup); diff --git a/src/vt/configs/debug/debug_config.h b/src/vt/configs/debug/debug_config.h index c034e5beb4..e0a0370f27 100644 --- a/src/vt/configs/debug/debug_config.h +++ b/src/vt/configs/debug/debug_config.h @@ -82,7 +82,8 @@ enum CatEnum : uint64_t { temperedlb = 1ull<<30, phase = 1ull<<31, context = 1ull<<32, - epoch = 1ull<<33 + epoch = 1ull<<33, + temperedwmin = 1ull<<34 }; enum CtxEnum : uint64_t { @@ -129,6 +130,7 @@ vt_option_category_pretty_print(group, "group") vt_option_category_pretty_print(handler, "handler") vt_option_category_pretty_print(hierlb, "HierarchicalLB") vt_option_category_pretty_print(temperedlb, "TemperedLB") +vt_option_category_pretty_print(temperedwmin, "TemperedWMin") vt_option_category_pretty_print(lb, "lb") vt_option_category_pretty_print(location, "location") vt_option_category_pretty_print(objgroup, "objgroup") diff --git a/src/vt/runtime/runtime_banner.cc b/src/vt/runtime/runtime_banner.cc index 2034a83679..ce74b66ac4 100644 --- a/src/vt/runtime/runtime_banner.cc +++ b/src/vt/runtime/runtime_banner.cc @@ -908,6 +908,7 @@ void Runtime::printStartupBanner() { vt_runtime_debug_warn_compile(handler) vt_runtime_debug_warn_compile(hierlb) vt_runtime_debug_warn_compile(temperedlb) + vt_runtime_debug_warn_compile(temperedwmin) vt_runtime_debug_warn_compile(scatter) vt_runtime_debug_warn_compile(sequence) vt_runtime_debug_warn_compile(sequence_vrt) From d9951d4d528a0e85f803c4216c0c44e3ef06e065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 10 May 2022 18:02:26 +0200 Subject: [PATCH 024/106] #1672: tests: lb: improve logging --- .../vrt/collection/balance/baselb/baselb.cc | 4 ++-- .../balance/lb_invoke/lb_manager.cc | 8 ++++++- .../balance/temperedwmin/temperedwmin.cc | 6 +++++ tests/unit/collection/test_lb.extended.cc | 23 +++++++++++-------- 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 36eae88ec7..0b415be21e 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -270,8 +270,8 @@ void BaseLB::recvSharedEdges(CommMsg* msg) { for (auto&& elm : comm) { comm_map->insert(elm); vt_debug_print( - verbose, lb, "recv shared edge: from={}, to={}\n", elm.first.fromObj(), - elm.first.toObj() + verbose, temperedwmin, "recvSharedEdges: from={}, to={}\n", + elm.first.fromObj(), elm.first.toObj() ); } } diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index e9a0eacbc9..2d77803fdf 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -742,6 +742,11 @@ void makeGraphSymmetric( elm::CommMapType const& comm_data = iter->second; std::unordered_map shared_edges; + vt_debug_print( + verbose, temperedwmin, "makeGraphSymmetric: comm size={}\n", + comm_data.size() + ); + for (auto&& elm : comm_data) { if ( elm.first.commCategory() == elm::CommCategory::SendRecv and @@ -759,7 +764,8 @@ void makeGraphSymmetric( ); vt_debug_print( - verbose, lb, "makeGraphSymmetric: from={}, to={}\n", from, to + verbose, temperedwmin, "makeGraphSymmetric: elm: from={}, to={}\n", + from, to ); if (from_node != this_node) { diff --git a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc index 0f6fb82e9e..fc97ebcb91 100644 --- a/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc +++ b/src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc @@ -88,6 +88,12 @@ void TemperedWMin::inputParams(balance::SpecEntry* spec) { alpha_ = spec->getOrDefault("alpha", alpha_); beta_ = spec->getOrDefault("beta", beta_); gamma_ = spec->getOrDefault("gamma", gamma_); + + vt_debug_print( + normal, temperedwmin, + "TemperedWMin::inputParams: alpha={}, beta={}, gamma={}\n", + alpha_, beta_, gamma_ + ); } TimeType TemperedWMin::getModeledWork(const elm::ElementIDStruct& obj) { diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index 74ea5e7740..a2b660894b 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -47,6 +47,7 @@ #include "test_collection_common.h" #include "data_message.h" +#include "vt/phase/phase_manager.h" #include "vt/vrt/collection/manager.h" #include "vt/vrt/collection/balance/lb_data_holder.h" #include "vt/vrt/collection/balance/node_lb_data.h" @@ -90,7 +91,7 @@ void colHandler(MyMsg*, MyCol* col) { struct TestLoadBalancerOther : TestParallelHarnessParam { }; struct TestLoadBalancerGreedy : TestParallelHarnessParam { }; -void runTest(std::string lb_name) { +void runTest(std::string const& lb_name, const int phases = num_phases) { vt::theConfig()->vt_lb = true; vt::theConfig()->vt_lb_name = lb_name; if (vt::theContext()->getNode() == 0) { @@ -124,7 +125,7 @@ void runTest(std::string lb_name) { proxy = vt::theCollection()->constructCollective(range); }); - for (int phase = 0; phase < num_phases; phase++) { + for (int phase = 0; phase < phases; phase++) { // Do some work. runInEpochCollective([&]{ proxy.broadcastCollective(); @@ -155,7 +156,7 @@ TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_keep_last_elm) { } TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { - runTest("TemperedWMin"); + runTest("TemperedWMin", 2); // auto proxy = theLBManager()->getLB(); // runInEpochCollective( @@ -163,23 +164,25 @@ TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { // [phase, proxy] { vrt::collection::balance::makeGraphSymmetric(phase, proxy); } // ); - auto phase = num_phases - 1; - auto iter = theNodeLBData()->getNodeComm()->find(phase); + auto phase = thePhase()->getCurrentPhase(); + auto iter = theNodeLBData()->getNodeComm()->find(phase - 1); ASSERT_NE(iter, theNodeLBData()->getNodeComm()->end()); elm::CommMapType const& comm_data = iter->second; - fmt::print( - "\ntest_make_graph_symmetric: comm_map.size={}\n", comm_data.size() + vt_debug_print( + verbose, temperedwmin, "test_make_graph_symmetric: comm size={}\n", + comm_data.size() ); for (auto&& elm : comm_data) { if ( elm.first.commCategory() == elm::CommCategory::SendRecv and not elm.first.selfEdge() ) { - fmt::print( - "test_make_graph_symmetric: from={}, to={}\n", elm.first.fromObj(), - elm.first.toObj() + vt_debug_print( + verbose, temperedwmin, + "test_make_graph_symmetric: elm: from={}, to={}\n", + elm.first.fromObj(), elm.first.toObj() ); } } From a0394e7110d9f15b4bd6749dddfd27e7089e64cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 11 May 2022 16:38:28 +0200 Subject: [PATCH 025/106] #1672: extract `getSharedEdges` method --- .../balance/lb_invoke/lb_manager.cc | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 2d77803fdf..44f7f7a58c 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -728,22 +728,15 @@ void LBManager::closeStatisticsFile() { statistics_writer_ = nullptr; } -void makeGraphSymmetric( - PhaseType phase, objgroup::proxy::Proxy proxy -) { +// Go through the comm graph and extract out paired SendRecv edges that are +// not self-send and have a non-local edge +std::unordered_map +getSharedEdges(elm::CommMapType const& comm_data) { auto const this_node = theContext()->getNode(); - auto iter = theNodeLBData()->getNodeComm()->find(phase); - if (iter == theNodeLBData()->getNodeComm()->end()) { - return; - } - - // Go through the comm graph and extract out paired SendRecv edges that are - // not self-send and have a non-local edge - elm::CommMapType const& comm_data = iter->second; std::unordered_map shared_edges; vt_debug_print( - verbose, temperedwmin, "makeGraphSymmetric: comm size={}\n", + verbose, temperedwmin, "getSharedEdges: comm size={}\n", comm_data.size() ); @@ -764,7 +757,7 @@ void makeGraphSymmetric( ); vt_debug_print( - verbose, temperedwmin, "makeGraphSymmetric: elm: from={}, to={}\n", + verbose, temperedwmin, "getSharedEdges: elm: from={}, to={}\n", from, to ); @@ -776,6 +769,19 @@ void makeGraphSymmetric( } } + return shared_edges; +} + +void makeGraphSymmetric( + PhaseType phase, objgroup::proxy::Proxy proxy +) { + auto iter = theNodeLBData()->getNodeComm()->find(phase); + if (iter == theNodeLBData()->getNodeComm()->end()) { + return; + } + + auto shared_edges = getSharedEdges(iter->second); + for (auto&& elm : shared_edges) { proxy[elm.first].send( elm.second From 8dc21e78ab6d32cae136753b18cd2c49a39bd0a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 18 May 2022 19:21:12 +0200 Subject: [PATCH 026/106] #1672: tests: setup data for tests manually --- .../balance/lb_invoke/lb_manager.cc | 10 ++--- tests/unit/collection/test_lb.extended.cc | 44 +++++++++++++------ 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 44f7f7a58c..ebe26f1a75 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -751,16 +751,16 @@ getSharedEdges(elm::CommMapType const& comm_data) { auto from_node = from.curr_node; auto to_node = to.curr_node; - vtAssert( - from_node == this_node or to_node == this_node, - "One node must involve this node" - ); - vt_debug_print( verbose, temperedwmin, "getSharedEdges: elm: from={}, to={}\n", from, to ); + vtAssert( + from_node == this_node or to_node == this_node, + "One node must involve this node" + ); + if (from_node != this_node) { shared_edges[from_node][elm.first] = elm.second; } else if (to_node != this_node) { diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index a2b660894b..c0cc1927a4 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -156,25 +156,41 @@ TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_keep_last_elm) { } TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { - runTest("TemperedWMin", 2); - - // auto proxy = theLBManager()->getLB(); - // runInEpochCollective( - // "test_make_graph_symmetric -> makeGraphSymmetric", - // [phase, proxy] { vrt::collection::balance::makeGraphSymmetric(phase, proxy); } - // ); - - auto phase = thePhase()->getCurrentPhase(); - auto iter = theNodeLBData()->getNodeComm()->find(phase - 1); - ASSERT_NE(iter, theNodeLBData()->getNodeComm()->end()); + // setup + auto const this_node = theContext()->getNode(); + auto const num_nodes = theContext()->getNumNodes(); + auto const next_node = (this_node + 1) % num_nodes; + auto const phase = thePhase()->getCurrentPhase(); + + elm::ElementIDStruct id_from, id_to; + id_from.id = this_node * 4 + 1; + id_from.curr_node = this_node; + id_to.id = next_node * 4 + 1; + id_to.curr_node = next_node; + + elm::ElementLBData elm_data; + double const bytes = 10.0; + elm_data.sendToEntity(id_to, id_from, bytes); + theNodeLBData()->addNodeLBData(id_from, &elm_data, nullptr); + + // test + auto proxy = theLBManager()->getLB(); + runInEpochCollective( + "test_make_graph_symmetric -> makeGraphSymmetric", + [phase, proxy] { vrt::collection::balance::makeGraphSymmetric(phase, proxy); } + ); - elm::CommMapType const& comm_data = iter->second; + // assert + auto comm_data = theNodeLBData()->getNodeComm(phase); + ASSERT_NE(comm_data, nullptr); + // TODO: assert that each node received appropriate element + // instead of just printing vt_debug_print( verbose, temperedwmin, "test_make_graph_symmetric: comm size={}\n", - comm_data.size() + comm_data->size() ); - for (auto&& elm : comm_data) { + for (auto&& elm : *comm_data) { if ( elm.first.commCategory() == elm::CommCategory::SendRecv and not elm.first.selfEdge() From 1a92b28f98a1de8aa7260bd4ba5fbf033fde4ee4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 25 May 2022 22:40:12 +0200 Subject: [PATCH 027/106] #1672: tests: verify that comm data is stored on both sides --- .../collection/balance/lb_invoke/lb_manager.h | 4 +- tests/unit/collection/test_lb.extended.cc | 53 ++++++++++--------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index 413630ece2..d64bf6b7b6 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -97,8 +97,6 @@ struct LBManager : runtime::component::Component { static std::unique_ptr construct(); public: - LBProxyType getLB() { return lb_instances_["chosen"]; } - /** * \internal * \brief Decide which LB to invoke given a certain phase @@ -223,7 +221,6 @@ struct LBManager : runtime::component::Component { ); void commitPhaseStatistics(PhaseType phase); -protected: /** * \internal \brief Collectively construct a new load balancer * @@ -234,6 +231,7 @@ struct LBManager : runtime::component::Component { template LBProxyType makeLB(); +protected: /** * \internal * \brief Run the load balancer diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index c0cc1927a4..504818670d 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -52,6 +52,7 @@ #include "vt/vrt/collection/balance/lb_data_holder.h" #include "vt/vrt/collection/balance/node_lb_data.h" #include "vt/vrt/collection/balance/lb_invoke/lb_manager.h" +#include "vt/vrt/collection/balance/temperedwmin/temperedwmin.h" #include "vt/utils/json/json_reader.h" #include "vt/utils/json/json_appender.h" @@ -160,47 +161,47 @@ TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { auto const this_node = theContext()->getNode(); auto const num_nodes = theContext()->getNumNodes(); auto const next_node = (this_node + 1) % num_nodes; - auto const phase = thePhase()->getCurrentPhase(); - elm::ElementIDStruct id_from, id_to; - id_from.id = this_node * 4 + 1; - id_from.curr_node = this_node; - id_to.id = next_node * 4 + 1; - id_to.curr_node = next_node; + auto id_from = + elm::ElmIDBits::createCollectionImpl(true, 1, this_node, this_node); + auto id_to = + elm::ElmIDBits::createCollectionImpl(true, 2, next_node, next_node); elm::ElementLBData elm_data; double const bytes = 10.0; elm_data.sendToEntity(id_to, id_from, bytes); theNodeLBData()->addNodeLBData(id_from, &elm_data, nullptr); + auto const phase = thePhase()->getCurrentPhase(); + auto const comm_data = theNodeLBData()->getNodeComm(phase); + ASSERT_NE(comm_data, nullptr); + ASSERT_EQ(comm_data->size(), 1); + // test - auto proxy = theLBManager()->getLB(); + auto proxy = theLBManager()->makeLB(); runInEpochCollective( "test_make_graph_symmetric -> makeGraphSymmetric", [phase, proxy] { vrt::collection::balance::makeGraphSymmetric(phase, proxy); } ); + vt::theLBManager()->destroyLB(); // assert - auto comm_data = theNodeLBData()->getNodeComm(phase); - ASSERT_NE(comm_data, nullptr); - - // TODO: assert that each node received appropriate element - // instead of just printing - vt_debug_print( - verbose, temperedwmin, "test_make_graph_symmetric: comm size={}\n", - comm_data->size() - ); + ASSERT_EQ(comm_data->size(), 2); + auto const prev_node = (this_node + num_nodes - 1) % num_nodes; for (auto&& elm : *comm_data) { - if ( - elm.first.commCategory() == elm::CommCategory::SendRecv and - not elm.first.selfEdge() - ) { - vt_debug_print( - verbose, temperedwmin, - "test_make_graph_symmetric: elm: from={}, to={}\n", - elm.first.fromObj(), elm.first.toObj() - ); - } + auto const& comm_key = elm.first; + auto const from_home_node = comm_key.fromObj().getHomeNode(); + auto const to_home_node = comm_key.toObj().getHomeNode(); + + ASSERT_TRUE( + from_home_node == this_node and to_home_node == next_node or + from_home_node == prev_node and to_home_node == this_node + ); + + vt_debug_print( + verbose, temperedwmin, "test_make_graph_symmetric: elm: from={}, to={}\n", + comm_key.fromObj(), comm_key.toObj() + ); } } From e751d8030039adab7531bbf5993874c036f19240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 26 May 2022 17:42:48 +0200 Subject: [PATCH 028/106] #1672: tests: handle single node scenario --- tests/unit/collection/test_lb.extended.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index 504818670d..dfbe17774e 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -186,6 +186,11 @@ TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { vt::theLBManager()->destroyLB(); // assert + if (num_nodes == 1) { + ASSERT_EQ(comm_data->size(), 1); + return; + } + ASSERT_EQ(comm_data->size(), 2); auto const prev_node = (this_node + num_nodes - 1) % num_nodes; for (auto&& elm : *comm_data) { From d14d9063cd7edcb2c8a828a2eef5ad296a9fb47b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 26 May 2022 17:44:55 +0200 Subject: [PATCH 029/106] #1672: lb: use correct feature for debug prints --- src/vt/vrt/collection/balance/baselb/baselb.cc | 2 +- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 0b415be21e..bc0aebb334 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -270,7 +270,7 @@ void BaseLB::recvSharedEdges(CommMsg* msg) { for (auto&& elm : comm) { comm_map->insert(elm); vt_debug_print( - verbose, temperedwmin, "recvSharedEdges: from={}, to={}\n", + verbose, lb, "recvSharedEdges: from={}, to={}\n", elm.first.fromObj(), elm.first.toObj() ); } diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index ebe26f1a75..a3478de33b 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -736,8 +736,7 @@ getSharedEdges(elm::CommMapType const& comm_data) { std::unordered_map shared_edges; vt_debug_print( - verbose, temperedwmin, "getSharedEdges: comm size={}\n", - comm_data.size() + verbose, lb, "getSharedEdges: comm size={}\n", comm_data.size() ); for (auto&& elm : comm_data) { From 48f23d8bdd2ed0923b86848e26555305863d9516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 26 May 2022 18:11:43 +0200 Subject: [PATCH 030/106] #1672: tests: add missing parentheses --- tests/unit/collection/test_lb.extended.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index dfbe17774e..654663395f 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -199,8 +199,8 @@ TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { auto const to_home_node = comm_key.toObj().getHomeNode(); ASSERT_TRUE( - from_home_node == this_node and to_home_node == next_node or - from_home_node == prev_node and to_home_node == this_node + (from_home_node == this_node and to_home_node == next_node) or + (from_home_node == prev_node and to_home_node == this_node) ); vt_debug_print( From a4a6b238abc55b5f7b4fabb5e0d58a5a8426afff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 26 May 2022 21:56:26 +0200 Subject: [PATCH 031/106] #1672: tests: use `getModeledLoad` method --- .../balance/testserializationlb/testserializationlb.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/testserializationlb/testserializationlb.cc b/src/vt/vrt/collection/balance/testserializationlb/testserializationlb.cc index 1a7ee6a53a..1b694987ea 100644 --- a/src/vt/vrt/collection/balance/testserializationlb/testserializationlb.cc +++ b/src/vt/vrt/collection/balance/testserializationlb/testserializationlb.cc @@ -63,7 +63,9 @@ void TestSerializationLB::inputParams(balance::SpecEntry*) { } void TestSerializationLB::runLB(TimeType) { auto const this_node = theContext()->getNode(); for (auto obj : *load_model_) { - TimeTypeWrapper const load = load_model_->getWork(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); + auto const load = load_model_->getModeledLoad( + obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} + ); vt_debug_print( terse, lb, "\t TestSerializationLB::migrating object to: obj={}, load={}, from_node={} to_node={}\n", From 49a0012030275ef53145c2c979a2e28c91322136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 27 May 2022 10:16:27 +0200 Subject: [PATCH 032/106] #1672: fix merge conflicts --- src/vt/vrt/collection/balance/model/raw_data.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index 1cf29e8dc3..58c7e8c2d8 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -95,7 +95,6 @@ int RawData::getNumSubphases() { } TimeType RawData::getModeledLoad(ElementIDStruct object, PhaseOffset offset) { -{ return getRawLoad(object, offset); } From 12f5c6ba774981e7198067348a1b554c2feb69e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Mon, 30 May 2022 10:32:48 +0200 Subject: [PATCH 033/106] #1672: tests: remove unused argument --- tests/unit/collection/test_lb.extended.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index 654663395f..f3707c5b9e 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -92,7 +92,7 @@ void colHandler(MyMsg*, MyCol* col) { struct TestLoadBalancerOther : TestParallelHarnessParam { }; struct TestLoadBalancerGreedy : TestParallelHarnessParam { }; -void runTest(std::string const& lb_name, const int phases = num_phases) { +void runTest(std::string const& lb_name) { vt::theConfig()->vt_lb = true; vt::theConfig()->vt_lb_name = lb_name; if (vt::theContext()->getNode() == 0) { @@ -126,7 +126,7 @@ void runTest(std::string const& lb_name, const int phases = num_phases) { proxy = vt::theCollection()->constructCollective(range); }); - for (int phase = 0; phase < phases; phase++) { + for (int phase = 0; phase < num_phases; phase++) { // Do some work. runInEpochCollective([&]{ proxy.broadcastCollective(); From bcb1e846f151f167960cc16111ef6ec30171c16a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Mon, 30 May 2022 13:34:48 +0200 Subject: [PATCH 034/106] #1672: tests: make sure that TemperedWMin is chosen --- tests/unit/lb/test_temperedlb.nompi.cc | 28 +++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tests/unit/lb/test_temperedlb.nompi.cc b/tests/unit/lb/test_temperedlb.nompi.cc index f74f9697d9..e54fe99618 100644 --- a/tests/unit/lb/test_temperedlb.nompi.cc +++ b/tests/unit/lb/test_temperedlb.nompi.cc @@ -78,7 +78,7 @@ void orderAndVerify( ObjectOrdering order, const std::unordered_map& cur_objs, TimeType my_load, TimeType target_load, - const std::vector& soln, bool use_tempered_wmin = false) { + const std::vector& soln, bool use_tempered_wmin) { // have TemperedLB order the objects auto ordered_objs = use_tempered_wmin ? vt::vrt::collection::lb::TemperedWMin::orderObjects( @@ -95,24 +95,28 @@ void orderAndVerify( void orderUsingOverloadAndVerify( ObjectOrdering order, TimeType over_avg_sec /* my_load-target_load */, - const std::vector &soln + const std::vector &soln, bool use_temperd_wmin = false ) { std::unordered_map cur_objs; TimeType my_load = setupProblem(cur_objs); TimeType target_load = my_load - over_avg_sec; - orderAndVerify(order, cur_objs, my_load, target_load, soln); + orderAndVerify( + order, cur_objs, my_load, target_load, soln, use_temperd_wmin + ); } void orderUsingTargetLoadAndVerify( ObjectOrdering order, TimeType target_load_sec, - const std::vector &soln + const std::vector &soln, bool use_temperd_wmin = false ) { std::unordered_map cur_objs; TimeType my_load = setupProblem(cur_objs); TimeType target_load = target_load_sec; - orderAndVerify(order, cur_objs, my_load, target_load, soln); + orderAndVerify( + order, cur_objs, my_load, target_load, soln, use_temperd_wmin + ); } /////////////////////////////////////////////////////////////////////////// @@ -203,13 +207,23 @@ TEST_F(TestTemperedLB, test_temperedlb_ordering_largestobjects) { /////////////////////////////////////////////////////////////////////////// -TEST_F(TestTemperedLB, test_temperedwmin) { +TEST_F(TestTemperedLB, test_temperedwmin_ordering_elmid) { ObjectOrdering order = ObjectOrdering::ElmID; TimeType over_avg = 4.5; // result will be independent of over_avg std::vector soln = {0, 1, 2, 3, 4, 5}; - orderUsingOverloadAndVerify(order, over_avg, soln); + orderUsingOverloadAndVerify(order, over_avg, soln, true); +} + +TEST_F(TestTemperedLB, test_temperedwmin_ordering_smallobjects_largest) { + ObjectOrdering order = ObjectOrdering::SmallObjects; + TimeType target_load = 0.5; + // marginal_obj_load will be 9.0 + // load order will be 9.0, 6.0, 5.0, 4.0, 3.0, 2.0 + std::vector soln = {2, 1, 5, 3, 4, 0}; + + orderUsingTargetLoadAndVerify(order, target_load, soln, true); } }}} // end namespace vt::tests::unit From e481a3dac145a73a5229b4fdd4dcbb8c352aee2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 31 May 2022 14:19:03 +0200 Subject: [PATCH 035/106] #1672: tests: narrow down the assertions for test results --- tests/unit/collection/test_lb.extended.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index f3707c5b9e..c1eaf3705c 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -193,21 +193,32 @@ TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { ASSERT_EQ(comm_data->size(), 2); auto const prev_node = (this_node + num_nodes - 1) % num_nodes; + bool this_to_next = false, prev_to_this = false; + for (auto&& elm : *comm_data) { auto const& comm_key = elm.first; + auto const& comm_vol = elm.second; auto const from_home_node = comm_key.fromObj().getHomeNode(); auto const to_home_node = comm_key.toObj().getHomeNode(); - ASSERT_TRUE( - (from_home_node == this_node and to_home_node == next_node) or - (from_home_node == prev_node and to_home_node == this_node) - ); + if (from_home_node == this_node) { + ASSERT_EQ(to_home_node, next_node); + this_to_next = true; + } else if (from_home_node == prev_node) { + ASSERT_EQ(to_home_node, this_node); + prev_to_this = true; + } + ASSERT_EQ(comm_vol.bytes, bytes); vt_debug_print( verbose, temperedwmin, "test_make_graph_symmetric: elm: from={}, to={}\n", comm_key.fromObj(), comm_key.toObj() ); } + + // make sure that both (distinct) comms are present + ASSERT_TRUE(this_to_next); + ASSERT_TRUE(prev_to_this); } struct MyCol2 : vt::Collection {}; From e68ed98f6dde6394d6632dc1c6f1e7f9462b1946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 31 May 2022 15:15:07 +0200 Subject: [PATCH 036/106] #1672: fix formatting --- src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc index 90b70f41ba..80d811375e 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc @@ -312,7 +312,8 @@ void HierarchicalLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { bin_list.pop_back(); auto const& obj_time_milli = loadMilli(load_model_->getModeledLoad(obj_id, - {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} + )); this_load -= obj_time_milli; From f7878298288de1aa431d40f163dfb31885b6069a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 31 May 2022 15:18:25 +0200 Subject: [PATCH 037/106] #1672: lb: move CommMsg to baselb messages --- src/vt/vrt/collection/balance/baselb/baselb.h | 16 ---------------- .../vrt/collection/balance/baselb/baselb_msgs.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index da7d60424c..277a8cd007 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -179,22 +179,6 @@ struct BaseLB { std::shared_ptr pending_reassignment_ = nullptr; }; -struct CommMsg : vt::Message { - using MessageParentType = vt::Message; - vt_msg_serialize_required(); - - CommMsg() = default; - explicit CommMsg(lb::BaseLB::ElementCommType in_comm) : comm_(in_comm) { } - - lb::BaseLB::ElementCommType comm_; - - template - void serialize(SerializerT& s) { - MessageParentType::serialize(s); - s | comm_; - } -}; - }}}} // namespace vt::vrt::collection::lb #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_BASELB_BASELB_H*/ diff --git a/src/vt/vrt/collection/balance/baselb/baselb_msgs.h b/src/vt/vrt/collection/balance/baselb/baselb_msgs.h index 99b71c0db6..60002bba9b 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb_msgs.h +++ b/src/vt/vrt/collection/balance/baselb/baselb_msgs.h @@ -82,6 +82,22 @@ struct CountMsg : vt::collective::ReduceTMsg { {} }; +struct CommMsg : vt::Message { + using MessageParentType = vt::Message; + vt_msg_serialize_required(); + + CommMsg() = default; + explicit CommMsg(elm::CommMapType const& in_comm) : comm_(in_comm) { } + + elm::CommMapType comm_; + + template + void serialize(SerializerT& s) { + MessageParentType::serialize(s); + s | comm_; + } +}; + }}}} /* end namespace vt::vrt::collection::lb */ #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_BASELB_BASELB_MSGS_H*/ From cba6a3f5556df87935baa2d60056e9f22e920813 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 1 Jun 2022 20:54:23 +0200 Subject: [PATCH 038/106] #1672: Revert "improve post-commit hooks" This reverts commit 16a58cbd856ad38933b412981e250a14d4ed7f1e. --- scripts/post-commit | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/post-commit b/scripts/post-commit index 8c1a0aa4a3..ea8e3dd43e 100755 --- a/scripts/post-commit +++ b/scripts/post-commit @@ -5,12 +5,6 @@ # every commit. # Prints the resulting changes if there are any. -# don't run the action during rebase -if ! [[ $(git branch --show-current) ]] -then - exit -fi - output=$(git clang-format HEAD~1) if [ "$output" != "" ] From 89da42a34061587883fcee2658357e5096b09859 Mon Sep 17 00:00:00 2001 From: Jakub Strzebonski Date: Mon, 25 Apr 2022 17:01:04 +0200 Subject: [PATCH 039/106] #1715 allow labels on collections --- docs/md/collection.md | 6 +-- examples/callback/callback.cc | 2 +- examples/collection/insertable_collection.cc | 2 +- examples/collection/jacobi1d_vt.cc | 2 +- examples/collection/jacobi2d_vt.cc | 2 +- examples/collection/lb_iter.cc | 2 +- examples/collection/migrate_collection.cc | 2 +- examples/collection/reduce_integral.cc | 2 +- examples/collection/transpose.cc | 2 +- .../hello_world/hello_world_collection.cc | 2 +- .../hello_world_collection_collective.cc | 2 +- .../hello_world_collection_reduce.cc | 2 +- .../hello_world_collection_staged_insert.cc | 2 +- .../vrt/collection/collection_builder.impl.h | 3 +- src/vt/vrt/collection/holders/col_holder.h | 4 +- .../vrt/collection/holders/col_holder.impl.h | 5 +- src/vt/vrt/collection/manager.h | 39 ++++++++++---- src/vt/vrt/collection/manager.impl.h | 52 ++++++++++++------- .../vrt/collection/param/construct_params.h | 40 +++++++++----- tests/unit/collection/test_broadcast.cc | 2 +- .../collection/test_broadcast.extended.cc | 2 +- tests/unit/collection/test_broadcast.h | 4 +- .../collection/test_checkpoint.extended.cc | 18 +++++-- .../test_collection_construct_common.h | 19 ++++--- .../test_collection_group.extended.cc | 22 +++++--- .../test_collection_group_recreate.cc | 2 +- tests/unit/collection/test_construct.cc | 2 +- .../test_construct_no_idx.extended.cc | 2 +- tests/unit/collection/test_destroy.cc | 4 +- .../collection/test_index_types.extended.cc | 4 +- tests/unit/collection/test_insert.extended.cc | 12 ++--- tests/unit/collection/test_invoke.cc | 4 +- tests/unit/collection/test_lb.extended.cc | 23 ++++---- .../unit/collection/test_lb_data_retention.cc | 12 +++-- .../unit/collection/test_lb_lite.extended.cc | 4 +- tests/unit/collection/test_list_insert.cc | 8 +-- tests/unit/collection/test_mapping.cc | 4 +- .../test_model_per_collection.extended.cc | 8 ++- tests/unit/collection/test_query_context.cc | 8 ++- .../unit/collection/test_reduce_collection.cc | 2 +- .../collection/test_reduce_collection_race.cc | 4 +- tests/unit/collection/test_send.cc | 4 +- tests/unit/collection/test_send.extended.cc | 4 +- tests/unit/collection/test_send.h | 10 ++-- tests/unit/collection/test_storage.cc | 4 +- .../collectives/test_collectives_reduce.cc | 2 +- tests/unit/lb/test_lb_data_comm.cc | 10 ++-- tests/unit/location/test_hops.extended.cc | 4 +- tests/unit/phase/test_phase_insertions.cc | 2 +- ...test_callback_bcast_collection.extended.cc | 12 +++-- .../test_callback_send_collection.extended.cc | 12 +++-- .../test_rdma_collection_handle.extended.cc | 4 +- .../test_collection_chainset_tracking.cc | 4 +- .../termination/test_term_dep_send_chain.cc | 14 ++--- tutorial/tutorial_2a.h | 2 +- tutorial/tutorial_2b.h | 2 +- 56 files changed, 274 insertions(+), 159 deletions(-) diff --git a/docs/md/collection.md b/docs/md/collection.md index 4f8fe6fd21..3c4cff989e 100644 --- a/docs/md/collection.md +++ b/docs/md/collection.md @@ -74,7 +74,7 @@ definition (shown for a 1-dimensional collection): \code{.cpp} vt::NodeType my_map(vt::Index1D* idx, vt::Index1D* bounds, vt::NodeType num_nodes) { - return idx->x() % num_nodes; + return idx->x() % num_nodes; } \endcode @@ -149,7 +149,7 @@ membership. This is performed in the following way (note that this is a collective interface): \code{.cpp} - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("collection_label") .dynamicMembership(true) .collective(true) .wait(); @@ -158,7 +158,7 @@ collective interface): auto token = proxy.beginModification(); for (int i = 0; i < range.x() / 2; i++) { if (i % num_nodes == this_node) { - proxy[i].insertAt(token, i % 2); + proxy[i].insertAt(token, i % 2); } } proxy.finishModification(std::move(token)); diff --git a/examples/callback/callback.cc b/examples/callback/callback.cc index ad6a1277e1..d73f33bbc1 100644 --- a/examples/callback/callback.cc +++ b/examples/callback/callback.cc @@ -130,7 +130,7 @@ int main(int argc, char** argv) { } auto obj = vt::theObjGroup()->makeCollective(); - auto col = vt::makeCollection() + auto col = vt::makeCollection("examples_callback") .bounds(vt::Index1D(8)) .bulkInsert() .wait(); diff --git a/examples/collection/insertable_collection.cc b/examples/collection/insertable_collection.cc index 4c45282770..f1f16c4d34 100644 --- a/examples/collection/insertable_collection.cc +++ b/examples/collection/insertable_collection.cc @@ -69,7 +69,7 @@ int main(int argc, char** argv) { } auto range = vt::Index1D(num_elms); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("examples_insertable_collection") .dynamicMembership(true) .wait(); diff --git a/examples/collection/jacobi1d_vt.cc b/examples/collection/jacobi1d_vt.cc index 1cecfdf5e5..c752abe8e6 100644 --- a/examples/collection/jacobi1d_vt.cc +++ b/examples/collection/jacobi1d_vt.cc @@ -373,7 +373,7 @@ int main(int argc, char** argv) { using BaseIndexType = typename vt::Index1D::DenseIndexType; auto range = vt::Index1D(static_cast(num_objs)); - auto col_proxy = vt::makeCollection() + auto col_proxy = vt::makeCollection("examples_jacobi1d") .bounds(range) .bulkInsert() .wait(); diff --git a/examples/collection/jacobi2d_vt.cc b/examples/collection/jacobi2d_vt.cc index 7b9f239fb9..1211f315d3 100644 --- a/examples/collection/jacobi2d_vt.cc +++ b/examples/collection/jacobi2d_vt.cc @@ -497,7 +497,7 @@ int main(int argc, char** argv) { static_cast(numY_objs) ); - auto col_proxy = vt::makeCollection() + auto col_proxy = vt::makeCollection("examples_jacobi2d") .bounds(range) .bulkInsert() .wait(); diff --git a/examples/collection/lb_iter.cc b/examples/collection/lb_iter.cc index 980f94bf49..5b4a3ac2ca 100644 --- a/examples/collection/lb_iter.cc +++ b/examples/collection/lb_iter.cc @@ -122,7 +122,7 @@ int main(int argc, char** argv) { } auto range = vt::Index1D(num_elms); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("examples_lb_iter") .bounds(range) .bulkInsert() .wait(); diff --git a/examples/collection/migrate_collection.cc b/examples/collection/migrate_collection.cc index 18719f64a4..bc77c205f3 100644 --- a/examples/collection/migrate_collection.cc +++ b/examples/collection/migrate_collection.cc @@ -103,7 +103,7 @@ int main(int argc, char** argv) { } auto range = vt::Index1D(num_elms); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("examples_migrate_collection") .bounds(range) .bulkInsert() .wait(); diff --git a/examples/collection/reduce_integral.cc b/examples/collection/reduce_integral.cc index 65cb2beb86..45a40fbcb5 100644 --- a/examples/collection/reduce_integral.cc +++ b/examples/collection/reduce_integral.cc @@ -227,7 +227,7 @@ int main(int argc, char** argv) { using BaseIndexType = typename vt::Index1D::DenseIndexType; auto range = vt::Index1D(static_cast(num_objs)); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("examples_reduce_integral") .bounds(range) .bulkInsert() .wait(); diff --git a/examples/collection/transpose.cc b/examples/collection/transpose.cc index 84609db4bd..32b1074880 100644 --- a/examples/collection/transpose.cc +++ b/examples/collection/transpose.cc @@ -333,7 +333,7 @@ int main(int argc, char** argv) { vt::NodeType this_node = vt::theContext()->getNode(); auto range = vt::Index1D(num_pieces); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("examples_transpose") .bounds(range) .bulkInsert() .mapperFunc() diff --git a/examples/hello_world/hello_world_collection.cc b/examples/hello_world/hello_world_collection.cc index c0966606c0..0bfbd3d826 100644 --- a/examples/hello_world/hello_world_collection.cc +++ b/examples/hello_world/hello_world_collection.cc @@ -75,7 +75,7 @@ int main(int argc, char** argv) { if (this_node == 0) { auto range = vt::Index1D(num_elms); - auto proxy = vt::makeCollectionRooted() + auto proxy = vt::makeCollectionRooted("examples_hello_world_collection") .bounds(range) .bulkInsert() .wait(); diff --git a/examples/hello_world/hello_world_collection_collective.cc b/examples/hello_world/hello_world_collection_collective.cc index 3efa265cd7..091f67101a 100644 --- a/examples/hello_world/hello_world_collection_collective.cc +++ b/examples/hello_world/hello_world_collection_collective.cc @@ -72,7 +72,7 @@ int main(int argc, char** argv) { } auto range = vt::Index1D(num_elms); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("examples_hello_world_collection_collective") .bounds(range) .bulkInsert() .wait(); diff --git a/examples/hello_world/hello_world_collection_reduce.cc b/examples/hello_world/hello_world_collection_reduce.cc index b4d8b6764e..dcbb5f2930 100644 --- a/examples/hello_world/hello_world_collection_reduce.cc +++ b/examples/hello_world/hello_world_collection_reduce.cc @@ -79,7 +79,7 @@ int main(int argc, char** argv) { } auto range = vt::Index1D(num_elms); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("hello_world_collection_reduce") .bounds(range) .bulkInsert() .wait(); diff --git a/examples/hello_world/hello_world_collection_staged_insert.cc b/examples/hello_world/hello_world_collection_staged_insert.cc index 904ff643d8..e3c026d5ec 100644 --- a/examples/hello_world/hello_world_collection_staged_insert.cc +++ b/examples/hello_world/hello_world_collection_staged_insert.cc @@ -101,7 +101,7 @@ int main(int argc, char** argv) { } } - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("examples_hello_world_collection_staged_insert") .bounds(range) .listInsertHere(std::move(elms)) .wait(); diff --git a/src/vt/vrt/collection/collection_builder.impl.h b/src/vt/vrt/collection/collection_builder.impl.h index 81137355bf..1207f58f2f 100644 --- a/src/vt/vrt/collection/collection_builder.impl.h +++ b/src/vt/vrt/collection/collection_builder.impl.h @@ -115,6 +115,7 @@ void CollectionManager::makeCollectionImpl(param::ConstructParams& po) { auto const this_node = theContext()->getNode(); auto const has_bounds = po.has_bounds_; auto const bounds = has_bounds ? po.bounds_ : IndexType{}; + auto const label = po.label_; // Setup a proper default map if none is explicitly specified by the user if (po.map_han_ == uninitialized_handler and po.map_object_ == no_obj_group) { @@ -141,7 +142,7 @@ void CollectionManager::makeCollectionImpl(param::ConstructParams& po) { // Insert the typed meta-data for this new collection, along with creating // the meta-data collection holder for elements insertMetaCollection( - proxy, map_han, has_dynamic_membership, map_object, has_bounds, bounds + proxy, map_han, has_dynamic_membership, map_object, has_bounds, bounds, label ); std::size_t global_constructed_elms = 0; diff --git a/src/vt/vrt/collection/holders/col_holder.h b/src/vt/vrt/collection/holders/col_holder.h index 07397f36fd..ff50c6ef1a 100644 --- a/src/vt/vrt/collection/holders/col_holder.h +++ b/src/vt/vrt/collection/holders/col_holder.h @@ -72,11 +72,12 @@ struct CollectionHolder : BaseHolder { * \param[in] in_map_object the map object * \param[in] in_has_bounds whether it has bounds * \param[in] in_bounds the bounds + * \param[in] label collection label */ CollectionHolder( HandlerType const in_map_fn, bool const in_has_dynamic_membership, ObjGroupProxyType in_map_object, bool const in_has_bounds, - IndexT const in_bounds + IndexT const in_bounds, std::string const& label ); virtual ~CollectionHolder() {} @@ -93,6 +94,7 @@ struct CollectionHolder : BaseHolder { bool has_bounds = false; /**< Whether it as bounds */ IndexT bounds = {}; /**< The bounds */ Holder holder_; /**< Inner holder of elements */ + std::string label_; /**< Collection label */ }; }}} /* end namespace vt::vrt::collection */ diff --git a/src/vt/vrt/collection/holders/col_holder.impl.h b/src/vt/vrt/collection/holders/col_holder.impl.h index c7903ca758..fad6ce6c8b 100644 --- a/src/vt/vrt/collection/holders/col_holder.impl.h +++ b/src/vt/vrt/collection/holders/col_holder.impl.h @@ -53,12 +53,13 @@ template CollectionHolder::CollectionHolder( HandlerType const in_map_fn, bool const in_has_dynamic_membership, ObjGroupProxyType in_map_object, bool const in_has_bounds, - IndexT const in_bounds + IndexT const in_bounds, std::string const& label ) : map_fn(in_map_fn), has_dynamic_membership_(in_has_dynamic_membership), map_object(in_map_object), has_bounds(in_has_bounds), - bounds(in_bounds) + bounds(in_bounds), + label_(label) { } template diff --git a/src/vt/vrt/collection/manager.h b/src/vt/vrt/collection/manager.h index d3cf37d257..301377f653 100644 --- a/src/vt/vrt/collection/manager.h +++ b/src/vt/vrt/collection/manager.h @@ -173,6 +173,7 @@ struct CollectionManager * \brief Construct a new virtual context collection with an explicit, * pre-registered map handler * + * \param[in] label collection label * \param[in] range index range for the collection * \param[in] map pre-registered map handler * @@ -180,11 +181,15 @@ struct CollectionManager */ template CollectionProxyWrapType - constructMap(typename ColT::IndexType range, HandlerType const map); + constructMap( + std::string const& label, typename ColT::IndexType range, + HandlerType const map + ); /** * \brief Construct a new virtual context collection with templated map * + * \param[in] label collection label * \param[in] range index range for the collection * * \return proxy to the new collection @@ -193,7 +198,9 @@ struct CollectionManager typename ColT, mapping::ActiveMapTypedFnType fn > CollectionProxyWrapType - construct(typename ColT::IndexType range); + construct( + std::string const& label, typename ColT::IndexType range + ); /** * \brief Construct a new virtual context collection using the default map for @@ -202,13 +209,16 @@ struct CollectionManager * The default map is found by looking up the * \c vrt::collection::DefaultMap<...> specialization on the Index type. * + * \param[in] label collection label * \param[in] range index range for the collection * * \return proxy to the new collection */ template CollectionProxyWrapType - construct(typename ColT::IndexType range); + construct( + std::string const& label, typename ColT::IndexType range + ); /** * \brief Collectively construct a new virtual context collection with @@ -218,6 +228,7 @@ struct CollectionManager * method enables distributed SPMD construction of the virtual context * collection where each index is mapped with the \c MapFnT. * + * \param[in] label collection label * \param[in] range index range for the collection * * \return proxy to the new collection @@ -226,7 +237,7 @@ struct CollectionManager typename ColT, mapping::ActiveMapTypedFnType fn > IsDefaultConstructableType constructCollective( - typename ColT::IndexType range + std::string const& label, typename ColT::IndexType range ); /** @@ -239,6 +250,7 @@ struct CollectionManager * function for every index in the system based on the where each index is * mapped with the \c MapFnT. * + * \param[in] label collection label * \param[in] range index range for the collection * \param[in] cons_fn construct function to create an element on each node * @@ -248,7 +260,8 @@ struct CollectionManager typename ColT, mapping::ActiveMapTypedFnType fn > CollectionProxyWrapType constructCollective( - typename ColT::IndexType range, DistribConstructFn cons_fn + std::string const& label, typename ColT::IndexType range, + DistribConstructFn cons_fn ); /** @@ -259,13 +272,14 @@ struct CollectionManager * method enables distributed SPMD construction of the virtual context * collection where each index is mapped with the default mapping function. * + * \param[in] label collection label * \param[in] range index range for the collection * * \return proxy to the new collection */ template IsDefaultConstructableType constructCollective( - typename ColT::IndexType range + std::string const& label, typename ColT::IndexType range ); /** @@ -278,6 +292,7 @@ struct CollectionManager * function for every index in the system based on the where each index is * mapped with the default mapping function for this index type selected. * + * \param[in] label collection label * \param[in] range index range for the collection * \param[in] cons_fn construct function to create an element on each node * @@ -285,7 +300,8 @@ struct CollectionManager */ template CollectionProxyWrapType constructCollective( - typename ColT::IndexType range, DistribConstructFn cons_fn + std::string const& label, typename ColT::IndexType range, + DistribConstructFn cons_fn ); /** @@ -298,6 +314,7 @@ struct CollectionManager * will invoke that function for every index in the system based on the where * each index is mapped with the registered map function. * + * \param[in] label collection label * \param[in] range index range for the collection * \param[in] cons_fn construct function to create an element on each node * \param[in] map_han the registered map function @@ -306,8 +323,8 @@ struct CollectionManager */ template CollectionProxyWrapType constructCollectiveMap( - typename ColT::IndexType range, DistribConstructFn cons_fn, - HandlerType const map_han + std::string const& label, typename ColT::IndexType range, + DistribConstructFn cons_fn, HandlerType const map_han ); /** @@ -1625,6 +1642,7 @@ struct CollectionManager * * \note Resets the phase to 0 for every element. * + * \param[in] label collection label * \param[in] range the range of the collection to restart * \param[in] file_base the base file name for the files to read * @@ -1632,7 +1650,8 @@ struct CollectionManager */ template CollectionProxyWrapType restoreFromFile( - typename ColT::IndexType range, std::string const& file_base + std::string const& label, typename ColT::IndexType range, + std::string const& file_base ); /** diff --git a/src/vt/vrt/collection/manager.impl.h b/src/vt/vrt/collection/manager.impl.h index 05afadd191..a0cd8974a4 100644 --- a/src/vt/vrt/collection/manager.impl.h +++ b/src/vt/vrt/collection/manager.impl.h @@ -1234,30 +1234,35 @@ bool CollectionManager::insertCollectionElement( template CollectionManager::IsDefaultConstructableType -CollectionManager::constructCollective(typename ColT::IndexType range) { +CollectionManager::constructCollective( + std::string const& label, typename ColT::IndexType range +) { auto const map_han = getDefaultMap(); - auto cons_fn = [](typename ColT::IndexType){return std::make_unique();}; - return constructCollectiveMap(range,cons_fn,map_han); + auto cons_fn = [](typename ColT::IndexType) { return std::make_unique(); }; + return constructCollectiveMap(label, range, cons_fn, map_han); } template CollectionManager::CollectionProxyWrapType CollectionManager::constructCollective( - typename ColT::IndexType range, DistribConstructFn cons_fn + std::string const& label, typename ColT::IndexType range, + DistribConstructFn cons_fn ) { auto const map_han = getDefaultMap(); - return constructCollectiveMap(range,cons_fn,map_han); + return constructCollectiveMap(label, range, cons_fn, map_han); } template < typename ColT, mapping::ActiveMapTypedFnType fn > CollectionManager::IsDefaultConstructableType -CollectionManager::constructCollective(typename ColT::IndexType range) { +CollectionManager::constructCollective( + std::string const& label, typename ColT::IndexType range +) { using IndexT = typename ColT::IndexType; auto cons_fn = [](typename ColT::IndexType){return std::make_unique();}; auto const& map_han = auto_registry::makeAutoHandlerMap(); - return constructCollectiveMap(range,cons_fn,map_han); + return constructCollectiveMap(label, range, cons_fn, map_han); } template < @@ -1265,20 +1270,21 @@ template < > CollectionManager::CollectionProxyWrapType CollectionManager::constructCollective( - typename ColT::IndexType range, DistribConstructFn cons_fn + std::string const& label, typename ColT::IndexType range, + DistribConstructFn cons_fn ) { using IndexT = typename ColT::IndexType; auto const& map_han = auto_registry::makeAutoHandlerMap(); - return constructCollectiveMap(range,cons_fn,map_han); + return constructCollectiveMap(label, range, cons_fn, map_han); } template CollectionManager::CollectionProxyWrapType CollectionManager::constructCollectiveMap( - typename ColT::IndexType range, DistribConstructFn user_construct_fn, - HandlerType const map_han + std::string const& label, typename ColT::IndexType range, + DistribConstructFn user_construct_fn, HandlerType const map_han ) { - return vt::makeCollection() + return vt::makeCollection(label) .bounds(range) .bulkInsert() .mapperHandler(map_han) @@ -1378,27 +1384,32 @@ void CollectionManager::constructGroup(VirtualProxyType const& proxy) { template CollectionManager::CollectionProxyWrapType -CollectionManager::construct(typename ColT::IndexType range) { +CollectionManager::construct( + std::string const& label, typename ColT::IndexType range +) { auto const map_han = getDefaultMap(); - return constructMap(range,map_han); + return constructMap(label, range, map_han); } template < typename ColT, mapping::ActiveMapTypedFnType fn > CollectionManager::CollectionProxyWrapType -CollectionManager::construct(typename ColT::IndexType range) { +CollectionManager::construct( + std::string const& label, typename ColT::IndexType range +) { using IndexT = typename ColT::IndexType; auto const& map_han = auto_registry::makeAutoHandlerMap(); - return constructMap(range, map_han); + return constructMap(label, range, map_han); } template CollectionManager::CollectionProxyWrapType CollectionManager::constructMap( - typename ColT::IndexType range, HandlerType const map_handler + std::string const& label, typename ColT::IndexType range, + HandlerType const map_handler ) { - return vt::makeCollection() + return vt::makeCollection(label) .bounds(range) .bulkInsert() .collective(false) @@ -2269,7 +2280,8 @@ void CollectionManager::restoreFromFileInPlace( template CollectionManager::CollectionProxyWrapType CollectionManager::restoreFromFile( - typename ColT::IndexType range, std::string const& file_base + std::string const& label, typename ColT::IndexType range, + std::string const& file_base ) { using IndexType = typename ColT::IndexType; using DirectoryType = CollectionDirectory; @@ -2309,7 +2321,7 @@ CollectionManager::restoreFromFile( elms.emplace_back(std::make_tuple(idx, std::move(col_ptr))); } - return vt::makeCollection() + return vt::makeCollection(label) .bounds(range) .collective(true) .listInsertHere(std::move(elms)) diff --git a/src/vt/vrt/collection/param/construct_params.h b/src/vt/vrt/collection/param/construct_params.h index d543bf62f7..1fa5b3c77c 100644 --- a/src/vt/vrt/collection/param/construct_params.h +++ b/src/vt/vrt/collection/param/construct_params.h @@ -50,6 +50,7 @@ #include #include +#include #include @@ -67,7 +68,9 @@ struct ConstructParams; /// fwd-declare the builder helper function template -ConstructParams makeCollectionImpl(bool const is_collective); +ConstructParams makeCollectionImpl( + std::string const& label, bool const is_collective +); /** * \struct ConstructParams @@ -89,11 +92,11 @@ struct ConstructParams { private: struct BuilderTag{}; - ConstructParams(BuilderTag, bool const in_is_collective) - : collective_(in_is_collective) + ConstructParams(BuilderTag, std::string const& label, bool const in_is_collective) + : collective_(in_is_collective), label_(label) {} - friend ThisType makeCollectionImpl(bool const); + friend ThisType makeCollectionImpl(std::string const&, bool const); public: ConstructParams() = default; @@ -111,7 +114,8 @@ struct ConstructParams { migratable_(x.migratable_), map_han_(x.map_han_), proxy_bits_(x.proxy_bits_), - map_object_(x.map_object_) + map_object_(x.map_object_), + label_(x.label_) { vtAssert( not collective_, @@ -403,7 +407,8 @@ struct ConstructParams { | migratable_ | map_han_ | proxy_bits_ - | map_object_; + | map_object_ + | label_; s.skip(list_inserts_); s.skip(list_insert_here_); s.skip(cons_fn_); @@ -424,13 +429,16 @@ struct ConstructParams { HandlerType map_han_ = uninitialized_handler; VirtualProxyType proxy_bits_ = no_vrt_proxy; ObjGroupProxyType map_object_ = no_obj_group; + std::string label_ = ""; }; template -ConstructParams makeCollectionImpl(bool const is_collective) { +ConstructParams makeCollectionImpl( + std::string const& label, bool const is_collective +) { using ConsType = ConstructParams; using TagType = typename ConsType::BuilderTag; - return ConsType{TagType{}, is_collective}; + return ConsType{TagType{}, label, is_collective}; } }}}} /* end namespace vt::vrt::collection::param */ @@ -441,28 +449,32 @@ namespace vt { * \brief Collectively construct a new collection with the parameter object * builder * - * \param[in] bounds the bounds for the collection (optional) + * \param[in] label collection label * * \return the parameter configuration object */ template -vrt::collection::param::ConstructParams makeCollection() { +vrt::collection::param::ConstructParams makeCollection( + const std::string& label +) { bool const is_collective = true; - return vrt::collection::param::makeCollectionImpl(is_collective); + return vrt::collection::param::makeCollectionImpl(label, is_collective); } /** * \brief Construct a new collection (from a single node) with the parameter * object builder * - * \param[in] bounds the bounds for the collection (optional) + * \param[in] label collection label * * \return the parameter configuration object */ template -vrt::collection::param::ConstructParams makeCollectionRooted() { +vrt::collection::param::ConstructParams makeCollectionRooted( + const std::string& label +) { bool const is_collective = false; - return vrt::collection::param::makeCollectionImpl(is_collective); + return vrt::collection::param::makeCollectionImpl(label, is_collective); } } /* end namespace vt */ diff --git a/tests/unit/collection/test_broadcast.cc b/tests/unit/collection/test_broadcast.cc index 15c820d22d..3b2212ad14 100644 --- a/tests/unit/collection/test_broadcast.cc +++ b/tests/unit/collection/test_broadcast.cc @@ -52,7 +52,7 @@ namespace vt { namespace tests { namespace unit { namespace bcast { TYPED_TEST_P(TestBroadcast, test_broadcast_basic_1) { - test_broadcast_1(); + test_broadcast_1("test_broadcast_basic_1"); } REGISTER_TYPED_TEST_SUITE_P(TestBroadcast, test_broadcast_basic_1); diff --git a/tests/unit/collection/test_broadcast.extended.cc b/tests/unit/collection/test_broadcast.extended.cc index 7a365b4f61..2ae07e0efc 100644 --- a/tests/unit/collection/test_broadcast.extended.cc +++ b/tests/unit/collection/test_broadcast.extended.cc @@ -53,7 +53,7 @@ namespace vt { namespace tests { namespace unit { namespace bcast { TYPED_TEST_P(TestBroadcast, test_broadcast_extended_1) { - test_broadcast_1(); + test_broadcast_1("test_broadcast_extended_1"); } REGISTER_TYPED_TEST_SUITE_P(TestBroadcast, test_broadcast_extended_1); diff --git a/tests/unit/collection/test_broadcast.h b/tests/unit/collection/test_broadcast.h index 100f9e30ee..d254375527 100644 --- a/tests/unit/collection/test_broadcast.h +++ b/tests/unit/collection/test_broadcast.h @@ -119,7 +119,7 @@ struct TestBroadcast : TestParallelHarness {}; TYPED_TEST_SUITE_P(TestBroadcast); template -void test_broadcast_1(){ +void test_broadcast_1(std::string const& label) { using MsgType = typename ColType::MsgType; using TestParamType = typename ColType::ParamType; @@ -128,7 +128,7 @@ void test_broadcast_1(){ auto const& col_size = 32; auto range = TestIndex(col_size); TestParamType args = ConstructTuple::construct(); - auto proxy = theCollection()->construct(range); + auto proxy = theCollection()->construct(label, range); proxy.template broadcast< MsgType, diff --git a/tests/unit/collection/test_checkpoint.extended.cc b/tests/unit/collection/test_checkpoint.extended.cc index 48e2614b9f..3c2daf5805 100644 --- a/tests/unit/collection/test_checkpoint.extended.cc +++ b/tests/unit/collection/test_checkpoint.extended.cc @@ -175,7 +175,9 @@ TEST_F(TestCheckpoint, test_checkpoint_1) { auto checkpoint_name = "test_checkpoint_dir"; { - auto proxy = vt::theCollection()->constructCollective(range); + auto proxy = vt::theCollection()->constructCollective( + "test_checkpoint_1", range + ); vt::runInEpochCollective([&]{ if (this_node == 0) { @@ -217,7 +219,7 @@ TEST_F(TestCheckpoint, test_checkpoint_1) { { auto proxy = vt::theCollection()->restoreFromFile( - range, checkpoint_name + "test_checkpoint_1", range, checkpoint_name ); // Restoration should be done now @@ -246,7 +248,9 @@ TEST_F(TestCheckpoint, test_checkpoint_in_place_2) { auto range = vt::Index3D(num_nodes, num_elms, 4); auto checkpoint_name = "test_checkpoint_dir"; - auto proxy = vt::theCollection()->constructCollective(range); + auto proxy = vt::theCollection()->constructCollective( + "test_checkpoint_in_place_2", range + ); theConfig()->vt_lb = true; theConfig()->vt_lb_name = "TemperedLB"; @@ -316,7 +320,9 @@ TEST_F(TestCheckpoint, test_checkpoint_in_place_3) { auto range = vt::Index3D(num_nodes, num_elms, 4); auto checkpoint_name = "test_checkpoint_dir_2"; - auto proxy = vt::theCollection()->constructCollective(range); + auto proxy = vt::theCollection()->constructCollective( + "test_checkpoint_in_place_3", range + ); theConfig()->vt_lb = true; theConfig()->vt_lb_name = "TemperedLB"; @@ -354,7 +360,9 @@ TEST_F(TestCheckpoint, test_checkpoint_in_place_3) { } }); - auto proxy_new = vt::theCollection()->constructCollective(range); + auto proxy_new = vt::theCollection()->constructCollective( + "test_checkpoint_in_place_3", range + ); vt::runInEpochCollective([&]{ // Now, restore from the previous distribution diff --git a/tests/unit/collection/test_collection_construct_common.h b/tests/unit/collection/test_collection_construct_common.h index d007d1913f..ed8afaa893 100644 --- a/tests/unit/collection/test_collection_construct_common.h +++ b/tests/unit/collection/test_collection_construct_common.h @@ -90,12 +90,15 @@ struct ConstructParams { using IndexType = typename ColT::IndexType; using ProxyType = CollectionIndexProxy; - static ProxyType construct(IndexType idx) { - return theCollection()->construct(idx); + static ProxyType construct(std::string const& label, IndexType idx) { + return theCollection()->construct(label, idx); } - static ProxyType constructCollective(IndexType idx) { + + static ProxyType constructCollective( + std::string const& label, IndexType idx + ) { return theCollection()->constructCollective( - idx,[=](IndexType my_idx) { + label, idx, [=](IndexType my_idx) { return std::make_unique(); } ); @@ -106,14 +109,14 @@ TYPED_TEST_SUITE_P(TestConstruct); TYPED_TEST_SUITE_P(TestConstructDist); template -void test_construct_1() { +void test_construct_1(std::string const& label) { using MsgType = typename ColType::MsgType; auto const& this_node = theContext()->getNode(); if (this_node == 0) { auto const& col_size = 32; auto rng = TestIndex(col_size); - auto proxy = ConstructParams::construct(rng); + auto proxy = ConstructParams::construct(label, rng); proxy.template broadcast< MsgType, ConstructHandlers::handler @@ -127,7 +130,9 @@ void test_construct_distributed_1() { auto const& col_size = 32; auto rng = TestIndex(col_size); - auto proxy = ConstructParams::constructCollective(rng); + auto proxy = ConstructParams::constructCollective( + "test_construct_distributed_1", rng + ); proxy.template broadcast< MsgType, ConstructHandlers::handler diff --git a/tests/unit/collection/test_collection_group.extended.cc b/tests/unit/collection/test_collection_group.extended.cc index 6e0eb0f579..07e9861c35 100644 --- a/tests/unit/collection/test_collection_group.extended.cc +++ b/tests/unit/collection/test_collection_group.extended.cc @@ -122,7 +122,9 @@ TEST_F(TestCollectionGroup, test_collection_group_1) { auto const num_nodes = theContext()->getNumNodes(); if (my_node == 0) { auto const range = Index1D(std::max(num_nodes / 2, 1)); - auto const proxy = theCollection()->construct(range); + auto const proxy = theCollection()->construct( + "test_collection_group_1", range + ); proxy.broadcast(); } } @@ -131,8 +133,8 @@ TEST_F(TestCollectionGroup, test_collection_group_2) { auto const my_node = theContext()->getNode(); auto const range = Index1D(8); - auto const proxy = - theCollection()->constructCollective(range, [](vt::Index1D idx){ + auto const proxy = theCollection()->constructCollective( + "test_collection_group_2", range, [](vt::Index1D idx) { ++elem_counter; return std::make_unique(); } @@ -172,8 +174,8 @@ TEST_F(TestCollectionGroup, test_collection_group_3) { auto const my_node = theContext()->getNode(); auto const range = Index1D(8); - auto const proxy = - theCollection()->constructCollective(range, [](vt::Index1D idx){ + auto const proxy = theCollection()->constructCollective( + "test_collection_group_3", range, [](vt::Index1D idx) { ++elem_counter; return std::make_unique(); } @@ -277,7 +279,10 @@ struct TestCollection : Collection { TEST_F(TestCollectionGroup, test_collection_group_serialize_when_broadcast) { auto const range = Index1D{static_cast(theContext()->getNumNodes())}; auto const proxy = - makeCollection().bounds(range).bulkInsert().wait(); + makeCollection("test_collection_group_serialize_when_broadcast") + .bounds(range) + .bulkInsert() + .wait(); // Broadcast from each node runInEpochCollective([proxy] { @@ -292,7 +297,10 @@ TEST_F(TestCollectionGroup, test_collection_group_serialize_when_broadcast) { TEST_F(TestCollectionGroup, test_collection_group_dont_serialize_when_invoke) { auto const range = Index1D{static_cast(theContext()->getNumNodes())}; - auto const proxy = makeCollection().bounds(range).bulkInsert().wait(); + auto const proxy = makeCollection("test_collection_group_dont_serialize_when_invoke") + .bounds(range) + .bulkInsert() + .wait(); runInEpochCollective([proxy] { auto const this_node = theContext()->getNode(); diff --git a/tests/unit/collection/test_collection_group_recreate.cc b/tests/unit/collection/test_collection_group_recreate.cc index 89e502f50d..2529141ac9 100644 --- a/tests/unit/collection/test_collection_group_recreate.cc +++ b/tests/unit/collection/test_collection_group_recreate.cc @@ -81,7 +81,7 @@ TEST_F(TestCollectionGroupRecreate, test_collection_group_recreate_1) { // the reduction to finish properly auto const range = Index1D(std::max(num_nodes / 2, 1)); auto const proxy = theCollection()->constructCollective( - range, [](vt::Index1D) { + "test_collection_group_recreate_1", range, [](vt::Index1D) { return std::make_unique(); } ); diff --git a/tests/unit/collection/test_construct.cc b/tests/unit/collection/test_construct.cc index 651fb796a8..f25391b83f 100644 --- a/tests/unit/collection/test_construct.cc +++ b/tests/unit/collection/test_construct.cc @@ -65,7 +65,7 @@ using CollectionTestTypes = testing::Types; using CollectionTestDistTypes = testing::Types; TYPED_TEST_P(TestConstruct, test_construct_basic_1) { - test_construct_1(); + test_construct_1("test_construct_basic_1"); } TYPED_TEST_P(TestConstructDist, test_construct_distributed_basic_1) { diff --git a/tests/unit/collection/test_construct_no_idx.extended.cc b/tests/unit/collection/test_construct_no_idx.extended.cc index be7202487e..6cec8eb1da 100644 --- a/tests/unit/collection/test_construct_no_idx.extended.cc +++ b/tests/unit/collection/test_construct_no_idx.extended.cc @@ -63,7 +63,7 @@ struct ColMsg : CollectionMessage {}; using CollectionTestTypes = testing::Types; TYPED_TEST_P(TestConstruct, test_construct_no_idx_extended_1) { - test_construct_1(); + test_construct_1("test_construct_no_idx_extended_1"); } TYPED_TEST_P(TestConstructDist, test_construct_distributed_no_idx_extended_1) { diff --git a/tests/unit/collection/test_destroy.cc b/tests/unit/collection/test_destroy.cc index 196c36bdfa..35a7cab2bd 100644 --- a/tests/unit/collection/test_destroy.cc +++ b/tests/unit/collection/test_destroy.cc @@ -112,7 +112,9 @@ TEST_F(TestDestroy, test_destroy_1) { vt::runInEpochCollective([&]{ if (this_node == 0) { auto const& range = Index1D(num_nodes * num_elms_per_node); - auto proxy = theCollection()->construct(range); + auto proxy = theCollection()->construct( + "test_destroy_1", range + ); // ::fmt::print("broadcasting proxy={:x}\n", proxy.getProxy()); proxy.broadcast(); diff --git a/tests/unit/collection/test_index_types.extended.cc b/tests/unit/collection/test_index_types.extended.cc index be8fea7c89..7db2af6489 100644 --- a/tests/unit/collection/test_index_types.extended.cc +++ b/tests/unit/collection/test_index_types.extended.cc @@ -104,7 +104,9 @@ TYPED_TEST_P(TestCollectionIndexTypes, test_collection_index_1) { if (this_node == 0) { auto const& col_size = 32; auto range = IndexType(static_cast(col_size)); - auto proxy = theCollection()->construct(range); + auto proxy = theCollection()->construct( + "test_collection_index_1", range + ); for (BaseIndexType i = 0; i < static_cast(col_size); i++) { auto msg = makeMessage(34); if (i % 2 == 0) { diff --git a/tests/unit/collection/test_insert.extended.cc b/tests/unit/collection/test_insert.extended.cc index 035d65738c..7c104b38cb 100644 --- a/tests/unit/collection/test_insert.extended.cc +++ b/tests/unit/collection/test_insert.extended.cc @@ -95,7 +95,7 @@ TEST_F(TestInsert, test_insert_dense_1) { auto const num_nodes = theContext()->getNumNodes(); auto const range = Index1D(num_nodes * num_elms_per_node); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_insert_dense_1") .collective(true) .dynamicMembership(true) .bounds(range) @@ -136,7 +136,7 @@ TEST_F(TestInsert, test_insert_sparse_1) { auto const num_nodes = theContext()->getNumNodes(); auto const range = Index1D(num_nodes * num_elms_per_node * 16); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_insert_sparse_1") .collective(true) .dynamicMembership(true) .bounds(range) @@ -161,7 +161,7 @@ TEST_F(TestInsert, test_insert_dense_node_1) { auto const num_nodes = theContext()->getNumNodes(); auto const range = Index1D(num_nodes * num_elms_per_node); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_insert_dense_node_1") .collective(true) .dynamicMembership(true) .bounds(range) @@ -188,7 +188,7 @@ TEST_F(TestInsert, test_insert_sparse_node_1) { auto const num_nodes = theContext()->getNumNodes(); auto const range = Index1D(num_nodes * num_elms_per_node * 16); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_insert_sparse_node_1") .collective(true) .dynamicMembership(true) .bounds(range) @@ -215,7 +215,7 @@ TEST_F(TestInsert, test_insert_send_dense_node_1) { auto const num_nodes = theContext()->getNumNodes(); auto const range = Index1D(num_nodes * num_elms_per_node); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_insert_send_dense_node_1") .collective(true) .dynamicMembership(true) .bounds(range) @@ -253,7 +253,7 @@ TEST_F(TestInsert, test_insert_send_sparse_node_1) { auto const num_nodes = theContext()->getNumNodes(); auto const range = Index1D(num_nodes * num_elms_per_node * 16); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_insert_send_sparse_node_1") .collective(true) .dynamicMembership(true) .bounds(range) diff --git a/tests/unit/collection/test_invoke.cc b/tests/unit/collection/test_invoke.cc index 46c749db46..94a9d99dff 100644 --- a/tests/unit/collection/test_invoke.cc +++ b/tests/unit/collection/test_invoke.cc @@ -85,7 +85,9 @@ TEST_F(TestCollectionInvoke, test_collection_invoke_1) { auto const& num_nodes = theContext()->getNumNodes(); auto const num_elems = Index1D{static_cast(num_nodes)}; - auto proxy = theCollection()->constructCollective(num_elems); + auto proxy = theCollection()->constructCollective( + "test_collection_invoke_1", num_elems + ); auto const dest_elem = Index1D{static_cast(this_node)}; diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index c1eaf3705c..0d59743a53 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -92,7 +92,7 @@ void colHandler(MyMsg*, MyCol* col) { struct TestLoadBalancerOther : TestParallelHarnessParam { }; struct TestLoadBalancerGreedy : TestParallelHarnessParam { }; -void runTest(std::string const& lb_name) { +void runTest(std::string const& lb_name, std::string const& label) { vt::theConfig()->vt_lb = true; vt::theConfig()->vt_lb_name = lb_name; if (vt::theContext()->getNode() == 0) { @@ -123,7 +123,7 @@ void runTest(std::string const& lb_name) { // Construct a collection runInEpochCollective([&]{ - proxy = vt::theCollection()->constructCollective(range); + proxy = vt::theCollection()->constructCollective(label, range); }); for (int phase = 0; phase < num_phases; phase++) { @@ -139,21 +139,21 @@ void runTest(std::string const& lb_name) { } TEST_P(TestLoadBalancerOther, test_load_balancer_other_1) { - runTest(GetParam()); + runTest(GetParam(), "test_load_balancer_other_1"); } TEST_P(TestLoadBalancerOther, test_load_balancer_other_keep_last_elm) { vt::theConfig()->vt_lb_keep_last_elm = true; - runTest(GetParam()); + runTest(GetParam(), "test_load_balancer_other_keep_last_elm"); } TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_2) { - runTest(GetParam()); + runTest(GetParam(), "test_load_balancer_greedy_2"); } TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_keep_last_elm) { vt::theConfig()->vt_lb_keep_last_elm = true; - runTest(GetParam()); + runTest(GetParam(), "test_load_balancer_greedy_keep_last_elm"); } TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { @@ -229,7 +229,8 @@ TEST_F(TestLoadBalancerNoWork, test_load_balancer_no_work) { auto const num_nodes = theContext()->getNumNodes(); auto const range = Index1D(num_nodes * 8); theCollection()->constructCollective( - range, [](vt::Index1D) { return std::make_unique(); } + "test_load_balancer_no_work", range, + [](vt::Index1D) { return std::make_unique(); } ); vt::theConfig()->vt_lb = true; @@ -304,7 +305,9 @@ TEST_P(TestNodeLBDataDumper, test_node_lb_data_dumping_with_interval) { // Construct a collection runInEpochCollective([&] { - proxy = vt::theCollection()->constructCollective(range); + proxy = vt::theCollection()->constructCollective( + "test_node_stats_dumping_with_interval", range + ); }); for (int phase = 0; phase < num_phases; phase++) { @@ -417,7 +420,9 @@ TEST_F(TestRestoreLBData, test_restore_lb_data_data_1) { // Construct a collection runInEpochCollective([&] { - proxy = vt::theCollection()->constructCollective(range); + proxy = vt::theCollection()->constructCollective( + "test_restore_stats_data_1", range + ); }); vt::vrt::collection::balance::LBDataHolder lbdh; diff --git a/tests/unit/collection/test_lb_data_retention.cc b/tests/unit/collection/test_lb_data_retention.cc index f7a00bc9a3..d50d4ea9d4 100644 --- a/tests/unit/collection/test_lb_data_retention.cc +++ b/tests/unit/collection/test_lb_data_retention.cc @@ -125,7 +125,9 @@ TEST_F(TestLBDataRetention, test_lbdata_retention_last1) { // Construct two collections runInEpochCollective([&]{ - proxy = vt::theCollection()->constructCollective(range); + proxy = vt::theCollection()->constructCollective( + "test_lbstats_retention_last1", range + ); }); // Get the base model, assert it's valid @@ -161,7 +163,9 @@ TEST_F(TestLBDataRetention, test_lbdata_retention_last2) { // Construct two collections runInEpochCollective([&]{ - proxy = vt::theCollection()->constructCollective(range); + proxy = vt::theCollection()->constructCollective( + "test_lbstats_retention_last2", range + ); }); // Get the base model, assert it's valid @@ -197,7 +201,9 @@ TEST_F(TestLBDataRetention, test_lbdata_retention_last4) { // Construct two collections runInEpochCollective([&]{ - proxy = vt::theCollection()->constructCollective(range); + proxy = vt::theCollection()->constructCollective( + "test_lbstats_retention_last4", range + ); }); // Get the base model, assert it's valid diff --git a/tests/unit/collection/test_lb_lite.extended.cc b/tests/unit/collection/test_lb_lite.extended.cc index 88dc79d756..de9359b987 100644 --- a/tests/unit/collection/test_lb_lite.extended.cc +++ b/tests/unit/collection/test_lb_lite.extended.cc @@ -130,7 +130,9 @@ struct TestLB : TestParallelHarness { }; TEST_F(TestLB, test_lb_1) { auto const& this_node = theContext()->getNode(); auto const& range = Index1D(32); - auto proxy = theCollection()->constructCollective(range); + auto proxy = theCollection()->constructCollective( + "test_lb_1", range + ); for (int i = 0; i < num_iter; i++) { auto cur_time = vt::timing::getCurrentTime(); diff --git a/tests/unit/collection/test_list_insert.cc b/tests/unit/collection/test_list_insert.cc index 4b74d54b4a..f96981c83e 100644 --- a/tests/unit/collection/test_list_insert.cc +++ b/tests/unit/collection/test_list_insert.cc @@ -99,7 +99,7 @@ TEST_F(TestListInsert, test_bounded_list_insert_1) { list_insert.emplace_back(Index1D{i}); } - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_bounded_list_insert_1") .collective(true) .bounds(range) .listInsert(list_insert) @@ -137,7 +137,7 @@ TEST_F(TestListInsert, test_unbounded_list_insert_2) { list_insert.emplace_back(Index1D{i}); } - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_unbounded_list_insert_2") .collective(true) .listInsert(list_insert) .template mapperObjGroupConstruct>() @@ -171,7 +171,7 @@ TEST_F(TestListInsert, test_bounded_list_insert_here_3) { } } - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_bounded_list_insert_here_3") .collective(true) .bounds(range) .listInsertHere(std::move(elms)) @@ -205,7 +205,7 @@ TEST_F(TestListInsert, test_unbounded_list_insert_here_4) { } } - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_unbounded_list_insert_here_4") .collective(true) .listInsertHere(std::move(elms)) .template mapperObjGroupConstruct>() diff --git a/tests/unit/collection/test_mapping.cc b/tests/unit/collection/test_mapping.cc index 0e0adfe25b..e1574ef498 100644 --- a/tests/unit/collection/test_mapping.cc +++ b/tests/unit/collection/test_mapping.cc @@ -184,7 +184,7 @@ TYPED_TEST_P(TestMapping, test_custom_mapping_1) { } }); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_custom_mapping_1") .bulkInsert(range) .mapperObjGroup(my_proxy) .wait(); @@ -196,7 +196,7 @@ TYPED_TEST_P(TestMapping, test_custom_mapping_1) { EXPECT_EQ(counter, num_work); num_work = 0; - auto proxy2 = vt::makeCollection() + auto proxy2 = vt::makeCollection("test_custom_mapping_1") .bulkInsert(range) .template mapperObjGroupConstruct() .wait(); diff --git a/tests/unit/collection/test_model_per_collection.extended.cc b/tests/unit/collection/test_model_per_collection.extended.cc index 1b33a20394..41fa1cdaf6 100644 --- a/tests/unit/collection/test_model_per_collection.extended.cc +++ b/tests/unit/collection/test_model_per_collection.extended.cc @@ -108,8 +108,12 @@ TEST_F(TestModelPerCollection, test_model_per_collection_1) { // Construct two collections runInEpochCollective([&]{ - proxy1 = vt::theCollection()->constructCollective(range); - proxy2 = vt::theCollection()->constructCollective(range); + proxy1 = vt::theCollection()->constructCollective( + "test_model_per_collection_1", range + ); + proxy2 = vt::theCollection()->constructCollective( + "test_model_per_collection_1", range + ); }); // Get the base model, assert it's valid diff --git a/tests/unit/collection/test_query_context.cc b/tests/unit/collection/test_query_context.cc index da24a49d37..8674c4bbbd 100644 --- a/tests/unit/collection/test_query_context.cc +++ b/tests/unit/collection/test_query_context.cc @@ -77,7 +77,9 @@ TEST_F(TestQueryContext, test_query_context_broadcast_1) { auto const& num_nodes = theContext()->getNumNodes(); if (this_node == 0) { auto const& range = Index1D(num_nodes * num_elms_per_node); - auto proxy = theCollection()->construct(range); + auto proxy = theCollection()->construct( + "test_query_context_broadcast_1", range + ); for (int i = 0; i < 10; i++) { proxy.broadcast(); } @@ -89,7 +91,9 @@ TEST_F(TestQueryContext, test_query_context_send_1) { auto const& num_nodes = theContext()->getNumNodes(); if (this_node == 0) { auto const& range = Index1D(num_nodes * num_elms_per_node); - auto proxy = theCollection()->construct(range); + auto proxy = theCollection()->construct( + "test_query_context_send_1", range + ); for (int i = 0; i < num_nodes * num_elms_per_node; i++) { proxy[i].send(); } diff --git a/tests/unit/collection/test_reduce_collection.cc b/tests/unit/collection/test_reduce_collection.cc index c02d8da8a4..a64c7c68dd 100644 --- a/tests/unit/collection/test_reduce_collection.cc +++ b/tests/unit/collection/test_reduce_collection.cc @@ -57,7 +57,7 @@ TEST_P(TestReduceCollection, test_reduce_op) { auto reduce_case = GetParam(); auto size = (reduce_case == 5 ? collect_size * 4 : collect_size); auto const& range = Index1D(size); - auto proxy = theCollection()->construct(range); + auto proxy = theCollection()->construct("test_reduce_op", range); switch (reduce_case) { case 0: proxy.broadcast(my_node); break; diff --git a/tests/unit/collection/test_reduce_collection_race.cc b/tests/unit/collection/test_reduce_collection_race.cc index ea6aa81207..6f9e037b4e 100644 --- a/tests/unit/collection/test_reduce_collection_race.cc +++ b/tests/unit/collection/test_reduce_collection_race.cc @@ -75,7 +75,9 @@ TEST_P(TestReduceCollectionRace, test_reduce_race_1) { multipler = GetParam(); auto const range = Index1D(multipler * num_nodes); - auto proxy = theCollection()->constructCollective(range); + auto proxy = theCollection()->constructCollective( + "test_reduce_race_1", range + ); proxy.broadcastCollective(); proxy.broadcastCollective(); diff --git a/tests/unit/collection/test_send.cc b/tests/unit/collection/test_send.cc index db86a3d058..1c76866b2b 100644 --- a/tests/unit/collection/test_send.cc +++ b/tests/unit/collection/test_send.cc @@ -54,7 +54,7 @@ namespace vt { namespace tests { namespace unit { namespace send { TYPED_TEST_P(TestCollectionSend, test_collection_send_basic_1) { - test_collection_send_1(); + test_collection_send_1("test_collection_send_basic_1"); } TYPED_TEST_P(TestCollectionSendSz, test_collection_send_sz_basic_1) { @@ -62,7 +62,7 @@ TYPED_TEST_P(TestCollectionSendSz, test_collection_send_sz_basic_1) { } TYPED_TEST_P(TestCollectionSendMem, test_collection_send_ptm_basic_1) { - test_collection_send_ptm_1(); + test_collection_send_ptm_1("test_collection_send_ptm_basic_1"); } REGISTER_TYPED_TEST_SUITE_P(TestCollectionSend, test_collection_send_basic_1); diff --git a/tests/unit/collection/test_send.extended.cc b/tests/unit/collection/test_send.extended.cc index 92468623a6..958b9d309c 100644 --- a/tests/unit/collection/test_send.extended.cc +++ b/tests/unit/collection/test_send.extended.cc @@ -54,11 +54,11 @@ namespace vt { namespace tests { namespace unit { namespace send { TYPED_TEST_P(TestCollectionSend, test_collection_send_extended_1) { - test_collection_send_1(); + test_collection_send_1("test_collection_send_extended_1"); } TYPED_TEST_P(TestCollectionSendMem, test_collection_send_ptm_extended_1) { - test_collection_send_ptm_1(); + test_collection_send_ptm_1("test_collection_send_ptm_extended_1"); } REGISTER_TYPED_TEST_SUITE_P(TestCollectionSend, test_collection_send_extended_1); diff --git a/tests/unit/collection/test_send.h b/tests/unit/collection/test_send.h index 826aa03ec8..22760efa75 100644 --- a/tests/unit/collection/test_send.h +++ b/tests/unit/collection/test_send.h @@ -180,8 +180,8 @@ TYPED_TEST_SUITE_P(TestCollectionSend); TYPED_TEST_SUITE_P(TestCollectionSendSz); TYPED_TEST_SUITE_P(TestCollectionSendMem); -template -void test_collection_send_1() { +template +void test_collection_send_1(std::string const& label) { using MsgType = typename ColType::MsgType; using TestParamType = typename ColType::ParamType; @@ -190,7 +190,7 @@ void test_collection_send_1() { auto const& col_size = 32; auto range = TestIndex(col_size); TestParamType args = ConstructTuple::construct(); - auto proxy = theCollection()->construct(range); + auto proxy = theCollection()->construct(label, range); for (int i = 0; i < col_size; i++) { auto msg = makeMessage(args); EXPECT_EQ(msg.size(), sizeof(MsgType)); @@ -228,7 +228,7 @@ void test_collection_send_sz_1() { } template -void test_collection_send_ptm_1() { +void test_collection_send_ptm_1(std::string const& label) { using MsgType = typename ColType::MsgType; using TestParamType = typename ColType::ParamType; @@ -237,7 +237,7 @@ void test_collection_send_ptm_1() { auto const& col_size = 32; auto range = TestIndex(col_size); TestParamType args = ConstructTuple::construct(); - auto proxy = theCollection()->construct(range); + auto proxy = theCollection()->construct(label, range); for (int i = 0; i < col_size; i++) { auto msg = makeMessage(args); //proxy[i].template send::handler>(msg); diff --git a/tests/unit/collection/test_storage.cc b/tests/unit/collection/test_storage.cc index 5b45e8d155..d6a0ce5a26 100644 --- a/tests/unit/collection/test_storage.cc +++ b/tests/unit/collection/test_storage.cc @@ -114,7 +114,9 @@ TEST_F(TestCollectionStorage, test_collection_storage_1) { using MsgType = typename TestCol::TestMsg; - auto proxy = theCollection()->constructCollective(num_elms); + auto proxy = theCollection()->constructCollective( + "test_collection_storage_1", num_elms + ); runInEpochCollective([=]{ proxy.broadcastCollective(); diff --git a/tests/unit/collectives/test_collectives_reduce.cc b/tests/unit/collectives/test_collectives_reduce.cc index 53a6be6879..e39891b1e6 100644 --- a/tests/unit/collectives/test_collectives_reduce.cc +++ b/tests/unit/collectives/test_collectives_reduce.cc @@ -104,7 +104,7 @@ TEST_F(TestReduce, test_reduce_with_no_elements_on_root_rank) { objgroup_proxy = vt::theObjGroup()->makeCollective(); auto range = vt::Index1D(num_elms); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_reduce_with_no_elements_on_root_rank") .bounds(range) .mapperFunc() .bulkInsert() diff --git a/tests/unit/lb/test_lb_data_comm.cc b/tests/unit/lb/test_lb_data_comm.cc index 917e7fc7b2..4b00535596 100644 --- a/tests/unit/lb/test_lb_data_comm.cc +++ b/tests/unit/lb/test_lb_data_comm.cc @@ -274,7 +274,7 @@ void doReduce(MyMsg*, MyCol* col) { // ColT -> ColT, expected communication edge on receive side TEST_F(TestLBDataComm, test_lb_data_comm_col_to_col_send) { auto range = vt::Index1D{dim1}; - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_lb_stats_comm_col_to_col_send") .bounds(range) .bulkInsert() .wait(); @@ -330,7 +330,7 @@ TEST_F(TestLBDataComm, test_lb_data_comm_col_to_col_send) { // ColT -> ObjGroup, expected communication edge on send side TEST_F(TestLBDataComm, test_lb_data_comm_col_to_objgroup_send) { auto range = vt::Index1D{dim1}; - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_lb_stats_comm_col_to_objgroup_send") .bounds(range) .bulkInsert() .wait(); @@ -393,7 +393,7 @@ TEST_F(TestLBDataComm, test_lb_data_comm_col_to_objgroup_send) { // ObjGroup -> ColT, expected communication edge on receive side TEST_F(TestLBDataComm, test_lb_data_comm_objgroup_to_col_send) { auto range = vt::Index1D{dim1}; - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_lb_stats_comm_objgroup_to_col_send") .bounds(range) .bulkInsert() .wait(); @@ -502,7 +502,7 @@ TEST_F(TestLBDataComm, test_lb_data_comm_objgroup_to_objgroup_send) { // Handler -> ColT, expected communication edge on receive side TEST_F(TestLBDataComm, test_lb_data_comm_handler_to_col_send) { auto range = vt::Index1D{dim1}; - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_lb_stats_comm_handler_to_col_send") .bounds(range) .bulkInsert() .wait(); @@ -562,7 +562,7 @@ TEST_F(TestLBDataComm, test_lb_data_comm_handler_to_col_send) { // ColT -> Handler, expected communication edge on send side TEST_F(TestLBDataComm, test_lb_data_comm_col_to_handler_send) { auto range = vt::Index1D{dim1}; - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_lb_stats_comm_col_to_handler_send") .bounds(range) .bulkInsert() .wait(); diff --git a/tests/unit/location/test_hops.extended.cc b/tests/unit/location/test_hops.extended.cc index 5df1a8e79c..fc7a09fedd 100644 --- a/tests/unit/location/test_hops.extended.cc +++ b/tests/unit/location/test_hops.extended.cc @@ -136,7 +136,9 @@ TEST_F(TestHops, test_hops_1) { auto this_node = theContext()->getNode(); auto const& range = vt::Index2D((int)num_nodes, (int)num_elms); - auto proxy = theCollection()->constructCollective(range); + auto proxy = theCollection()->constructCollective( + "test_hops_1", range + ); for (int i = 0; i < 100; i++) { if (this_node == 0) { diff --git a/tests/unit/phase/test_phase_insertions.cc b/tests/unit/phase/test_phase_insertions.cc index fb83aab5ed..1627118504 100644 --- a/tests/unit/phase/test_phase_insertions.cc +++ b/tests/unit/phase/test_phase_insertions.cc @@ -107,7 +107,7 @@ TEST_F(TestPhaseInsertions, test_phase_insertions_1) { auto num_nodes = theContext()->getNumNodes(); int insert_counter = range.x() / 2; - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_phase_insertions_1") .bounds(range) .mapperFunc() .collective(true) diff --git a/tests/unit/pipe/test_callback_bcast_collection.extended.cc b/tests/unit/pipe/test_callback_bcast_collection.extended.cc index 48d3a15287..0f2f348949 100644 --- a/tests/unit/pipe/test_callback_bcast_collection.extended.cc +++ b/tests/unit/pipe/test_callback_bcast_collection.extended.cc @@ -135,7 +135,9 @@ TEST_F(TestCallbackBcastCollection, test_callback_bcast_collection_1) { vt::CollectionProxy proxy; if (this_node == 0) { - proxy = theCollection()->construct(range); + proxy = theCollection()->construct( + "test_callback_bcast_collection_1", range + ); } runInEpochCollective([&]{ @@ -165,7 +167,9 @@ TEST_F(TestCallbackBcastCollection, test_callback_bcast_collection_2) { vt::CollectionProxy proxy; if (this_node == 0) { - proxy = theCollection()->construct(range); + proxy = theCollection()->construct( + "test_callback_bcast_collection_2", range + ); } runInEpochCollective([&]{ @@ -197,7 +201,9 @@ TEST_F(TestCallbackBcastCollection, test_callback_bcast_collection_3) { vt::CollectionProxy proxy; if (this_node == 0) { - proxy = theCollection()->construct(range); + proxy = theCollection()->construct( + "test_callback_bcast_collection_3", range + ); } runInEpochCollective([&]{ diff --git a/tests/unit/pipe/test_callback_send_collection.extended.cc b/tests/unit/pipe/test_callback_send_collection.extended.cc index 7548df0da7..02c1efac2c 100644 --- a/tests/unit/pipe/test_callback_send_collection.extended.cc +++ b/tests/unit/pipe/test_callback_send_collection.extended.cc @@ -132,7 +132,9 @@ TEST_F(TestCallbackSendCollection, test_callback_send_collection_1) { vt::CollectionProxy proxy; if (this_node == 0) { - proxy = theCollection()->construct(range); + proxy = theCollection()->construct( + "test_callback_send_collection_1", range + ); } runInEpochCollective([this_node, proxy]{ @@ -171,7 +173,9 @@ TEST_F(TestCallbackSendCollection, test_callback_send_collection_2) { vt::CollectionProxy proxy; if (this_node == 0) { - proxy = theCollection()->construct(range); + proxy = theCollection()->construct( + "test_callback_send_collection_2", range + ); } runInEpochCollective([this_node, num_nodes, proxy]{ @@ -207,7 +211,9 @@ TEST_F(TestCallbackSendCollection, test_callback_send_collection_3) { vt::CollectionProxy proxy; if (this_node == 0) { - proxy = theCollection()->construct(range); + proxy = theCollection()->construct( + "test_callback_send_collection_3", range + ); } runInEpochCollective([this_node, proxy]{ diff --git a/tests/unit/rdma/test_rdma_collection_handle.extended.cc b/tests/unit/rdma/test_rdma_collection_handle.extended.cc index 9a47ebb5f3..72eb0acaac 100644 --- a/tests/unit/rdma/test_rdma_collection_handle.extended.cc +++ b/tests/unit/rdma/test_rdma_collection_handle.extended.cc @@ -147,7 +147,9 @@ TYPED_TEST_P(TestRDMAHandleCollection, test_rdma_handle_collection_1) { runInEpochCollective([&]{ auto range = vt::Index2D(8,8); - proxy = theCollection()->constructCollective(range); + proxy = theCollection()->constructCollective( + "test_rdma_handle_collection_1", range + ); }); runInEpochCollective([=]{ diff --git a/tests/unit/termination/test_collection_chainset_tracking.cc b/tests/unit/termination/test_collection_chainset_tracking.cc index 93e0156ee7..af932be0c1 100644 --- a/tests/unit/termination/test_collection_chainset_tracking.cc +++ b/tests/unit/termination/test_collection_chainset_tracking.cc @@ -56,7 +56,7 @@ using TestCollectionChainsetTracking = TestParallelHarness; TEST_F(TestCollectionChainsetTracking, test_local_chainset_tracking) { auto num_nodes = theContext()->getNumNodes(); auto range = Index3D(2, static_cast(num_nodes), 3); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_local_chainset_tracking") .collective(true) .bounds(range) .bulkInsert() @@ -97,7 +97,7 @@ TEST_F(TestCollectionChainsetTracking, test_local_chainset_tracking) { TEST_F(TestCollectionChainsetTracking, test_home_chainset_tracking) { auto num_nodes = theContext()->getNumNodes(); auto range = Index3D(2, static_cast(num_nodes), 3); - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("test_home_chainset_tracking") .collective(true) .bounds(range) .bulkInsert() diff --git a/tests/unit/termination/test_term_dep_send_chain.cc b/tests/unit/termination/test_term_dep_send_chain.cc index 42eac71659..de4d631f25 100644 --- a/tests/unit/termination/test_term_dep_send_chain.cc +++ b/tests/unit/termination/test_term_dep_send_chain.cc @@ -315,10 +315,10 @@ struct MyObjGroup { frontend_proxy_ = vt::theObjGroup()->makeCollective(this); } - void makeColl(NodeType num_nodes, int k) { + void makeColl(std::string const& label, NodeType num_nodes, int k) { auto range = vt::Index2D(static_cast(num_nodes),k); backend_proxy_ = vt::theCollection()->constructCollective( - range, [=](vt::Index2D idx) { + label, range, [=](vt::Index2D idx) { return std::make_unique(num_nodes, k); } ); @@ -475,7 +475,7 @@ TEST_P(TestTermDepSendChain, test_term_dep_send_chain) { auto local = std::make_unique(); local->startup(); local->makeVT(); - local->makeColl(num_nodes,k); + local->makeColl("test_term_dep_send_chain", num_nodes, k); // Must have barrier here so op4Impl does not bounce early (invalid proxy)! vt::theCollective()->barrier(); @@ -584,11 +584,11 @@ struct MergeObjGroup frontend_proxy_ = vt::theObjGroup()->makeCollective(this); } - void makeColl(NodeType num_nodes, int k, double offset) { + void makeColl(std::string const& label, NodeType num_nodes, int k, double offset) { auto const node = theContext()->getNode(); auto range = vt::Index2D(static_cast(num_nodes),k); backend_proxy_ = vt::theCollection()->constructCollective( - range, [=](vt::Index2D idx) { + label, range, [=](vt::Index2D idx) { return std::make_unique(num_nodes, offset); } ); @@ -675,12 +675,12 @@ TEST_P(TestTermDepSendChain, test_term_dep_send_chain_merge) { auto obj_a = std::make_unique(); obj_a->startup(); obj_a->makeVT(); - obj_a->makeColl(num_nodes,k, 0.0); + obj_a->makeColl("test_term_dep_send_chain_merge", num_nodes, k, 0.0); auto obj_b = std::make_unique(); obj_b->startup(); obj_b->makeVT(); - obj_b->makeColl(num_nodes,k, 1000.0); + obj_b->makeColl("test_term_dep_send_chain_merge", num_nodes, k, 1000.0); // Must have barrier here so op4Impl does not bounce early (invalid proxy)! vt::theCollective()->barrier(); diff --git a/tutorial/tutorial_2a.h b/tutorial/tutorial_2a.h index 9e16c1cdbd..d3e8f47056 100644 --- a/tutorial/tutorial_2a.h +++ b/tutorial/tutorial_2a.h @@ -88,7 +88,7 @@ static inline void collection() { // Construct the collection (collective variant): invoked by all nodes. By // default, the elements will be block mapped to the nodes - auto proxy = vt::makeCollection() + auto proxy = vt::makeCollection("tutorial_collection") .bounds(range) // Set the bounds for the collection .bulkInsert() // Bulk insert all the elements within the bounds .wait(); // Wait for construction and get the proxy back diff --git a/tutorial/tutorial_2b.h b/tutorial/tutorial_2b.h index f1749919d4..31737b1c3e 100644 --- a/tutorial/tutorial_2b.h +++ b/tutorial/tutorial_2b.h @@ -119,7 +119,7 @@ static inline void collectionReduce() { // Construct the collection in a rooted manner. By default, the elements // will be block mapped to the nodes - auto proxy = vt::makeCollectionRooted() + auto proxy = vt::makeCollectionRooted("tutorial_collection_reduce") .bounds(range) // Set the bounds for the collection .bulkInsert() // Bulk insert all the elements within the bounds .wait(); // Wait for construction and get the proxy back From 38e0c74ae55ef83667e616ea2285189135f6ccd7 Mon Sep 17 00:00:00 2001 From: Jakub Strzebonski Date: Wed, 27 Apr 2022 16:25:06 +0200 Subject: [PATCH 040/106] #1715 allow labels on object groups --- examples/callback/callback.cc | 2 +- examples/collection/jacobi1d_vt.cc | 2 +- examples/collection/jacobi2d_vt.cc | 2 +- examples/hello_world/objgroup.cc | 4 +- src/vt/messaging/collection_chain_set.impl.h | 4 +- src/vt/objgroup/manager.cc | 8 ++- src/vt/objgroup/manager.h | 49 ++++++++++++---- src/vt/objgroup/manager.impl.h | 32 +++++++---- src/vt/phase/phase_manager.cc | 4 +- src/vt/rdmahandle/manager.cc | 4 +- src/vt/rdmahandle/sub_handle.impl.h | 4 +- .../mapping/dense/unbounded_default.impl.h | 4 +- src/vt/trace/file_spec/spec.cc | 2 +- .../balance/lb_data_restart_reader.cc | 4 +- .../balance/lb_invoke/lb_manager.cc | 6 +- src/vt/vrt/collection/balance/node_lb_data.cc | 4 +- tests/perf/common/test_harness.cc | 4 +- tests/perf/ping_pong.cc | 4 +- tests/unit/active/test_async_op.cc | 2 +- tests/unit/active/test_async_op_threads.cc | 2 +- tests/unit/collection/test_list_insert.cc | 4 +- tests/unit/collection/test_mapping.cc | 12 ++-- .../collectives/test_collectives_reduce.cc | 4 +- tests/unit/lb/test_lb_data_comm.cc | 24 ++++++-- tests/unit/objgroup/test_objgroup.cc | 56 ++++++++++++------- tests/unit/rdma/test_rdma_handle.h | 14 +++-- .../test_rdma_static_sub_handle.extended.cc | 4 +- .../test_scheduler_priorities.extended.cc | 4 +- .../termination/test_term_dep_send_chain.cc | 8 ++- 29 files changed, 193 insertions(+), 84 deletions(-) diff --git a/examples/callback/callback.cc b/examples/callback/callback.cc index d73f33bbc1..df734afcaf 100644 --- a/examples/callback/callback.cc +++ b/examples/callback/callback.cc @@ -129,7 +129,7 @@ int main(int argc, char** argv) { return vt::rerror("requires at least 2 nodes"); } - auto obj = vt::theObjGroup()->makeCollective(); + auto obj = vt::theObjGroup()->makeCollective("examples_callback"); auto col = vt::makeCollection("examples_callback") .bounds(vt::Index1D(8)) .bulkInsert() diff --git a/examples/collection/jacobi1d_vt.cc b/examples/collection/jacobi1d_vt.cc index c752abe8e6..90311bdde1 100644 --- a/examples/collection/jacobi1d_vt.cc +++ b/examples/collection/jacobi1d_vt.cc @@ -367,7 +367,7 @@ int main(int argc, char** argv) { // Object group of all nodes that take part in computation // Used to determine whether the computation is finished - auto grp_proxy = vt::theObjGroup()->makeCollective(); + auto grp_proxy = vt::theObjGroup()->makeCollective("examples_jacobi1d"); // Create the decomposition into objects using BaseIndexType = typename vt::Index1D::DenseIndexType; diff --git a/examples/collection/jacobi2d_vt.cc b/examples/collection/jacobi2d_vt.cc index 1211f315d3..747da6a3dc 100644 --- a/examples/collection/jacobi2d_vt.cc +++ b/examples/collection/jacobi2d_vt.cc @@ -488,7 +488,7 @@ int main(int argc, char** argv) { // Object group of all nodes that take part in computation // Used to determine whether the computation is finished - auto grp_proxy = vt::theObjGroup()->makeCollective(); + auto grp_proxy = vt::theObjGroup()->makeCollective("examples_jacobi2d"); // Create the decomposition into objects using BaseIndexType = typename vt::Index2D::DenseIndexType; diff --git a/examples/hello_world/objgroup.cc b/examples/hello_world/objgroup.cc index 4c07e01d2c..a9d5620657 100644 --- a/examples/hello_world/objgroup.cc +++ b/examples/hello_world/objgroup.cc @@ -62,7 +62,9 @@ int main(int argc, char** argv) { vt::NodeType this_node = vt::theContext()->getNode(); vt::NodeType num_nodes = vt::theContext()->getNumNodes(); - auto proxy = vt::theObjGroup()->makeCollective(); + auto proxy = vt::theObjGroup()->makeCollective( + "examples_hello_world" + ); if (this_node == 0) { proxy[0].send(5,10); diff --git a/src/vt/messaging/collection_chain_set.impl.h b/src/vt/messaging/collection_chain_set.impl.h index 4a94b0eecb..48a3d566a1 100644 --- a/src/vt/messaging/collection_chain_set.impl.h +++ b/src/vt/messaging/collection_chain_set.impl.h @@ -62,7 +62,9 @@ CollectionChainSet::CollectionChainSet( static_assert(std::is_same::value, "Must match index type"); // Make this chain set an objgroup instance so we can send updates - auto p = theObjGroup()->makeCollective>(this); + auto p = theObjGroup()->makeCollective>( + "CollectionChainSet::CollectionChainSet()", this + ); auto const this_node = theContext()->getNode(); auto const proxy_bits = proxy.getProxy(); diff --git a/src/vt/objgroup/manager.cc b/src/vt/objgroup/manager.cc index 3865333d7c..ffab94c278 100644 --- a/src/vt/objgroup/manager.cc +++ b/src/vt/objgroup/manager.cc @@ -110,7 +110,7 @@ void ObjGroupManager::dispatch(MsgSharedPtr msg, HandlerType han) } ObjGroupProxyType ObjGroupManager::makeCollectiveImpl( - HolderBasePtrType base, void* obj_ptr + std::string const& label, HolderBasePtrType base, void* obj_ptr ) { auto const id = cur_obj_id_++; auto const node = theContext()->getNode(); @@ -126,6 +126,12 @@ ObjGroupProxyType ObjGroupManager::makeCollectiveImpl( std::forward_as_tuple(proxy), std::forward_as_tuple(std::move(base)) ); + labels_.emplace( + std::piecewise_construct, + std::forward_as_tuple(proxy), + std::forward_as_tuple(label) + ); + return proxy; } diff --git a/src/vt/objgroup/manager.h b/src/vt/objgroup/manager.h index 730b0d1c13..f0c4d3abdb 100644 --- a/src/vt/objgroup/manager.h +++ b/src/vt/objgroup/manager.h @@ -121,34 +121,37 @@ struct ObjGroupManager : runtime::component::Component { * \brief Collectively construct a new object group. Allocates and constructs * the object on each node by forwarding constructor arguments. * + * \param[in] label object group label * \param[in] args args to pass to the object's constructor on each node * * \return proxy to the object group */ template - ProxyType makeCollective(Args&&... args); + ProxyType makeCollective(std::string const& label, Args&&... args); /** * \brief Collectively construct a new object group from a existing unique * pointer to the local object * + * \param[in] label object group label * \param[in] obj the std::unique_ptr to the local object * * \return proxy to the object group */ template - ProxyType makeCollective(std::unique_ptr obj); + ProxyType makeCollective(std::string const& label, std::unique_ptr obj); /** * \brief Collectively construct a new object group with a callback to provide * a unique pointer on each node. * + * \param[in] label object group label * \param[in] fn callback function to construct * * \return proxy to the object group */ template - ProxyType makeCollective(MakeFnType fn); + ProxyType makeCollective(std::string const& label, MakeFnType fn); /** * \brief Collectively construct a new object group from a raw pointer to the @@ -158,24 +161,26 @@ struct ObjGroupManager : runtime::component::Component { * object. Do not allow the object to be deallocated before the object group * is destroyed. * + * \param[in] label object group label * \param[in] obj raw pointer to the object * * \return proxy to the object group */ template - ProxyType makeCollective(ObjT* obj); + ProxyType makeCollective(std::string const& label, ObjT* obj); /** * \brief Collectively construct a new object group from a smart-pointer-like * handle. * + * \param[in] label object group label * \param[in] obj the smart-pointer-like handle that the system holds until * destruction * * \return proxy to the object group */ template