Skip to content

Commit

Permalink
trap invalid GPU errors
Browse files Browse the repository at this point in the history
  • Loading branch information
abagusetty committed Aug 21, 2024
1 parent 42a7b81 commit e2d8532
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/tamm/gpu_streams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,18 +175,22 @@ static inline void getHardwareGPUCount(int* gpus_per_node) {
m_call = "cat /sys/class/drm/card*/gt/gt*/id | wc -l";
}
else if(pltf.get_backend() == sycl::backend::ext_oneapi_cuda) {
// TODO: can we use nvml api ?
// TODO: can we use nvml api ?, propably no
m_call = "nvidia-smi --query-gpu=name --format=csv,noheader | wc -l";
}
else if(pltf.get_backend() == sycl::backend::ext_oneapi_hip) {
// TODO: can we use ROCm SMI api ?
// TODO: can we use ROCm SMI api ?, probably no
m_call = "rocm-smi -i |grep GPU|wc -l";
}

std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(m_call.c_str(), "r"), pclose);
if(!pipe) { throw std::runtime_error("popen() failed!"); }
while(fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { result += buffer.data(); }
*gpus_per_node = stoi(result);

if(*gpus_per_node == 0) {
tamm_terminate("[TAMM ERROR] No GPUs detected on node!");
}
#endif
}

Expand Down

0 comments on commit e2d8532

Please sign in to comment.