Skip to content

Commit

Permalink
fix type comparison warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
ajaypanyala committed Nov 10, 2024
1 parent f9ef0f8 commit dc267c1
Show file tree
Hide file tree
Showing 11 changed files with 32 additions and 219 deletions.
7 changes: 4 additions & 3 deletions src/tamm/block_mult_plan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class FlatBlockMultPlan {
valid_ = true;
return;
}
else if(lhs_labels_.size() == 1 && rhs1_labels_.size() == 1 || rhs2_labels_.size() == 1) {
else if((lhs_labels_.size() == 1 && rhs1_labels_.size() == 1) || rhs2_labels_.size() == 1) {
op_type_ = FlatOpType::scalar_vector;
valid_ = true;
}
Expand Down Expand Up @@ -670,14 +670,15 @@ class BlockMultPlan {
void prep_loop_gemm_plan() {
if(!has_repeated_index_ && !has_reduction_index_) {
if(has_hadamard_index_) {
auto hadamard_labels = get_hadamard_labels();
auto hadamard_labels = get_hadamard_labels();
const ptrdiff_t hlabels = hadamard_labels.size();
for(size_t i = 0; i < hadamard_labels.size(); i++) {
auto lbl = lhs_labels_[i];
auto rhs1_pos =
std::find(rhs1_labels_.begin(), rhs2_labels_.end(), lbl) - rhs1_labels_.begin();
auto rhs2_pos =
std::find(rhs1_labels_.begin(), rhs2_labels_.end(), lbl) - rhs1_labels_.begin();
if(rhs1_pos >= hadamard_labels.size() || rhs2_pos >= hadamard_labels.size()) { return; }
if(rhs1_pos >= hlabels || rhs2_pos >= hlabels) { return; }
}
plan_ = Plan::loop_gemm;
}
Expand Down
6 changes: 3 additions & 3 deletions src/tamm/distribution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ class Distribution_SimpleRoundRobin: public Distribution {
EXPECTS(key >= 0 && key < total_num_blocks_);
// return {key % nproc_.value(), (key / nproc_.value()) * max_block_size_.value()};
Proc proc = (key * step_proc_.value() + start_proc_.value()) % nproc_.value();
EXPECTS(step_proc_ == 1 || total_num_blocks_.value() <= nproc_.value());
EXPECTS(step_proc_ == 1 || total_num_blocks_.value() <= (uint64_t) nproc_.value());
Offset offset =
(step_proc_ != Proc{1} ? Offset{0} : (key / nproc_.value()) * max_block_size_.value());
return {proc, offset};
Expand Down Expand Up @@ -543,7 +543,7 @@ class Distribution_Dense: public Distribution {
auto pgrid = internal::compute_proc_grid(ardims.size(), ardims, nproc.value(), 0.0, 0, nchnk);
nchnk.erase(std::remove(nchnk.begin(), nchnk.end(), -2), nchnk.end());
ardims.erase(std::remove(ardims.begin(), ardims.end(), -2), ardims.end());
auto rndim = ardims.size();
const int rndim = ardims.size();
if(rndim > 0 && rndim < ndim_)
pgrid = internal::compute_proc_grid(rndim, ardims, nproc.value(), 0.0, 0, nchnk);
int pgi = 0;
Expand Down Expand Up @@ -610,7 +610,7 @@ class Distribution_Dense: public Distribution {

Size total_size() const override {
Size result{1};
for(size_t i = 0; i < ndim_; i++) { result *= tiss_[i].max_num_indices(); }
for(int i = 0; i < ndim_; i++) { result *= tiss_[i].max_num_indices(); }
return result;
}

Expand Down
2 changes: 2 additions & 0 deletions src/tamm/errors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace tamm {
std::cerr << "ERROR: (Not implemented)" \
<< "file:" << __FILE__ << "function:" << __func__ \
<< " line:" << __LINE__ << std::endl; \
throw std::runtime_error(""); \
} while(0)

/**
Expand All @@ -25,6 +26,7 @@ namespace tamm {
std::cerr << "ERROR: (Not allowed)" \
<< "file:" << __FILE__ << "function:" << __func__ \
<< " line:" << __LINE__ << std::endl; \
throw std::runtime_error(""); \
} while(0)

/**
Expand Down
6 changes: 3 additions & 3 deletions src/tamm/index_loop_nest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,13 +350,13 @@ class IndexLoopNest {

template<typename Func>
void iterate(Func&& func) const {
bool dense_case = is_dense_case();
size_t ndim = iss_.size();
// bool dense_case = is_dense_case();
const int ndim = iss_.size();

if(is_dense_case() && ndim <= 4) {
IndexVector blockid(ndim);
size_t dims[ndim];
for(int i = 0; i < iss_.size(); i++) { dims[i] = iss_[i].num_tiles(); }
for(int i = 0; i < ndim; i++) { dims[i] = iss_[i].num_tiles(); }
if(ndim == 0) { func(blockid); }
else if(ndim == 1) {
for(blockid[0] = 0; blockid[0] < dims[0]; ++blockid[0]) { func(blockid); }
Expand Down
6 changes: 3 additions & 3 deletions src/tamm/memory_manager_ga.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ class MemoryManagerGA: public MemoryManager {
{
pmr = new MemoryRegionGA(*this);

int nranks = pg_.size().value();
int64_t element_size = get_element_size(eltype);
int64_t nels = local_nelements.value();
int nranks = pg_.size().value();
int64_t nels = local_nelements.value();
#if defined(USE_UPCXX)
int64_t element_size = get_element_size(eltype);
alloc_coll_upcxx(eltype, local_nelements, pmr, nranks, element_size, nels);
#else // USE_UPCXX
int ga_pg_default = GA_Pgroup_get_default();
Expand Down
2 changes: 1 addition & 1 deletion src/tamm/multop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class MultOp: public Op {
EXPECTS(!is_assign_);
auto& oprof = tamm::OpProfiler::instance();

using TensorElType = typename LabeledTensorT1::element_type;
// using TensorElType = typename LabeledTensorT1::element_type;
// determine set of all labels
IndexLabelVec all_labels{lhs_.labels()};
all_labels.insert(all_labels.end(), rhs1_.labels().begin(), rhs1_.labels().end());
Expand Down
192 changes: 1 addition & 191 deletions src/tamm/proc_group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

namespace tamm {

#if 1
class ProcGroup {
public:
/**
Expand Down Expand Up @@ -310,7 +309,7 @@ class ProcGroup {
EXPECTS(pg2.is_valid());
const size_t nranks = size().value();
std::vector<Proc> ret(nranks);
for(int i = 0; i < nranks; i++) { ret[i] = rank_translate(i, pg2); }
for(size_t i = 0; i < nranks; i++) { ret[i] = rank_translate(i, pg2); }
return ret;
}

Expand Down Expand Up @@ -730,193 +729,4 @@ class ProcGroup {
friend bool operator!=(const ProcGroup& lhs, const ProcGroup& rhs) { return !(lhs == rhs); }
}; // class ProcGroup

#else
/**
* @brief Wrapper to MPI communicator and related operations.
*/
class ProcGroup {
public:
// ProcGroup() = default;
ProcGroup(): mpi_comm_(std::make_shared<MPI_Comm>(MPI_COMM_NULL)), is_valid_(false) {
// MPI_Comm* comm_out = new MPI_Comm();
// *comm_out = MPI_COMM_NULL;
// mpi_comm_.reset(comm_out);
}
ProcGroup(MPI_Comm comm) {
// std::shared_ptr<MPI_Comm> comm_out = std::make_shared<MPI_Comm> (comm);
// mpi_comm_.reset(comm_out.get());
// MPI_Comm* comm_out = new MPI_Comm();
//*comm_out = comm;
// mpi_comm_.reset(comm_out);
mpi_comm_.reset(new MPI_Comm(comm));
is_valid_ = (comm != MPI_COMM_NULL);
}
static ProcGroup create_coll(MPI_Comm mpi_comm) {
MPI_Comm* comm_out = new MPI_Comm();
MPI_Comm_dup(mpi_comm, comm_out);
ProcGroup pg;
pg.mpi_comm_.reset(comm_out, deleter);
pg.is_valid_ = true;
pg.ga_pg_ = create_ga_process_group_coll(mpi_comm);
return pg;
}
ProcGroup(const ProcGroup&) = default;
ProcGroup(ProcGroup&& pg) // TBD: check if this can be default
:
mpi_comm_{std::move(pg.mpi_comm_)}, ga_pg_{pg.ga_pg_} {}
ProcGroup& operator=(const ProcGroup&) = default;
// ProcGroup(ProcGroup&&) = default;
ProcGroup& operator=(ProcGroup&&) = default;
~ProcGroup() = default;
// explicit ProcGroup(MPI_Comm comm = MPI_COMM_NULL)
// : comm_{comm},
// is_valid_{comm != MPI_COMM_NULL} { }
/**
* Is it a valid communicator (i.e., not MPI_COMM_NULL)
* @return true is wrapped MPI communicator is not MPI_COMM_NULL
*/
bool is_valid() const { return is_valid_; }
/**
* Rank of invoking process
* @return rank of invoking process in the wrapped communicator
*/
Proc rank() const {
int rank;
EXPECTS(is_valid());
// MPI_Comm_rank(comm_, &rank);
MPI_Comm_rank(*mpi_comm_, &rank);
return Proc{rank};
}
/**
* Number of ranks in the wrapped communicator
* @return Size of the wrapped communicator
*/
Proc size() const {
int nranks;
EXPECTS(is_valid());
// MPI_Comm_size(comm_, &nranks);
MPI_Comm_size(*mpi_comm_, &nranks);
return Proc{nranks};
}
/**
* Access the underlying MPI communicator
* @return the wrapped MPI communicator
*/
MPI_Comm comm() const {
// return comm_;
return *mpi_comm_;
}
int ga_pg() const { return ga_pg_; }
/**
* Duplicate/clone the wrapped MPI communicator
* @return A copy.
* @note This is a collective call on the wrapped communicator
* @todo Rename this call to clone_coll() to indicate this is a collective call.
*/
ProcGroup clone_coll() const { return create_coll(*mpi_comm_); }
// ProcGroup clone() const {
// EXPECTS(is_valid());
// MPI_Comm comm_out{MPI_COMM_NULL};
// MPI_Comm_dup(comm_, &comm_out);
// return ProcGroup{comm_out};
// }
void destroy_coll() {
MPI_Comm_free(mpi_comm_.get());
GA_Pgroup_destroy(ga_pg_);
is_valid_ = false;
}
/**
* Free the wrapped communicator
*/
/* void destroy() {
if(is_valid()) {
MPI_Comm_free(&comm_);
}
comm_ = MPI_COMM_NULL;
is_valid_ = false;
}*/
/**
* Barrier on the wrapped communicator.
*/
void barrier() {
// MPI_Barrier(comm_);
// MPI_Barrier(*mpi_comm_);
GA_Pgroup_sync(ga_pg_);
}
Proc rank_translate(Proc proc, const ProcGroup& pg2) {
EXPECTS(is_valid());
MPI_Group group1, group2;
int ranks1{static_cast<int>(proc.value())};
int ranks2{MPI_PROC_NULL};
// MPI_Comm_group(comm_, &group1);
MPI_Comm_group(*mpi_comm_, &group1);
MPI_Comm_group(*pg2.mpi_comm_, &group2);
MPI_Group_translate_ranks(group1, 1, &ranks1, group2, &ranks2);
assert(ranks2 != MPI_PROC_NULL);
MPI_Group_free(&group1);
MPI_Group_free(&group2);
return Proc{ranks2};
}
private:
/**
* Create a GA process group corresponding to the given proc group
* @param pg TAMM process group
* @return GA processes group on this TAMM process group
*/
static int create_ga_process_group_coll(MPI_Comm comm) {
int nranks;
MPI_Comm_size(comm, &nranks);
MPI_Group group, group_world;
int ranks[nranks], ranks_world[nranks];
MPI_Comm_group(comm, &group);
MPI_Comm_group(GA_MPI_Comm(), &group_world);
for(int i = 0; i < nranks; i++) { ranks[i] = i; }
MPI_Group_translate_ranks(group, nranks, ranks, group_world, ranks_world);
int ga_pg_default = GA_Pgroup_get_default();
GA_Pgroup_set_default(GA_Pgroup_get_world());
int ga_pg = GA_Pgroup_create(ranks_world, nranks);
GA_Pgroup_set_default(ga_pg_default);
MPI_Group_free(&group);
MPI_Group_free(&group_world);
return ga_pg;
}
// MPI_Comm comm_;// = MPI_COMM_NULL;
std::shared_ptr<MPI_Comm> mpi_comm_;
int ga_pg_;
bool is_valid_;
static void deleter(MPI_Comm* mpi_comm) {
EXPECTS(*mpi_comm != MPI_COMM_NULL);
delete mpi_comm;
}
static void deleter_comm(MPI_Comm* mpi_comm) { delete mpi_comm; }
friend bool operator==(const ProcGroup& lhs, const ProcGroup& rhs) {
int result;
MPI_Comm_compare(*lhs.mpi_comm_, *rhs.mpi_comm_, &result);
return result == MPI_IDENT;
}
friend bool operator!=(const ProcGroup& lhs, const ProcGroup& rhs) { return !(lhs == rhs); }
}; // class ProcGroup
#endif

} // namespace tamm
8 changes: 4 additions & 4 deletions src/tamm/rmm_memory_pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class RMMMemoryManager {

numa_set_bind_policy(1);
numa_set_strict(1);
unsigned numNumaNodes = numa_num_task_nodes();
int numNumaNodes = numa_num_task_nodes();

// for ranks_pn_=1, there is no need to check the mapping to numa-nodes (mostly used for CI)
// for ranks_pn_ > numNumaNodes, it has to be divisble by the number of numa-domains in the
Expand All @@ -187,8 +187,8 @@ class RMMMemoryManager {
numa_bind(numaNodes);
numa_bitmask_free(numaNodes);

int numa_id = numa_node_of_cpu(sched_getcpu());
long numa_total_size = numa_node_size(numa_id, &max_host_bytes);
int numa_id = numa_node_of_cpu(sched_getcpu());
/* long numa_total_size = */ numa_node_size(numa_id, &max_host_bytes);
max_host_bytes *= 0.40; // reserve 40% only of the free numa-node memory (reserving rest of
// GA, non-pool allocations)

Expand All @@ -213,7 +213,7 @@ class RMMMemoryManager {
if(detail::tamm_enable_sprhbm) {
numa_id = it->first;
numa_set_preferred(numa_id);
numa_total_size = numa_node_size(numa_id, &max_host_bytes);
/* numa_total_size = */ numa_node_size(numa_id, &max_host_bytes);
max_host_bytes *=
0.94; // One can use full HBM memory capacity, since the DDR is left for GA
}
Expand Down
10 changes: 5 additions & 5 deletions src/tamm/scheduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ class Scheduler {
auto misc_end = std::chrono::high_resolution_clock::now();
double misc_time =
std::chrono::duration_cast<std::chrono::duration<double>>((misc_end - misc_start)).count();
auto t1 = misc_end;
// auto t1 = misc_end;

// double nranks = 1.0 * ec_.pg().size().value();
oprof.multOpGetTime = 0;
Expand All @@ -224,10 +224,10 @@ class Scheduler {
std::vector<double> multop_dgemm_times;
std::vector<double> multop_add_times;
std::vector<double> multop_copy_times;
int nops = order.size();
const int nops = order.size();

assert(order.size() == 0 || order[0].first == 0); // level 0 sanity check
for(size_t i = 0; i < order.size(); i++) {
for(int i = 0; i < nops; i++) {
if(order[i].first != lvl) {
assert(order[i].first == lvl + 1);
// auto t2 = std::chrono::high_resolution_clock::now();
Expand Down Expand Up @@ -262,7 +262,7 @@ class Scheduler {
oprof.multOpAddTime = 0;
oprof.multOpCopyTime = 0;
}
auto t2 = std::chrono::high_resolution_clock::now();
// auto t2 = std::chrono::high_resolution_clock::now();
ec().pg().barrier();
lvl += 1;
auto t3 = std::chrono::high_resolution_clock::now();
Expand Down Expand Up @@ -347,7 +347,7 @@ class Scheduler {
int np = ec_.pg().size().value();
auto& pdata = ec_.get_profile_data();
if(ec_.pg().rank() == 0) {
for(int i = 0; i < order.size(); i++) {
for(int i = 0; i < nops; i++) {
pdata << i << ";" << order[i].first << ";"
<< ops_[order[i].second]->opstr_
// << "," << global_load_imbalance_times_min[i]
Expand Down
1 change: 0 additions & 1 deletion src/tamm/setop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,6 @@ void FlatPlan<T, LabeledTensorT>::apply(const SetOp<T, LabeledTensorT>& setop, E
template<typename T, typename LabeledTensorT>
void LHSPlan<T, LabeledTensorT>::apply(const SetOp<T, LabeledTensorT>& setop, ExecutionContext& ec,
ExecutionHW hw) {
using LHS_ElType = typename LabeledTensorT::element_type;
auto lhs_lt = setop.lhs();
Scalar alpha = setop.alpha();
bool is_assign = setop.is_assign();
Expand Down
Loading

0 comments on commit dc267c1

Please sign in to comment.