-
Notifications
You must be signed in to change notification settings - Fork 10
/
autoRun.py
51 lines (47 loc) · 2.01 KB
/
autoRun.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import time
import subprocess
from datetime import datetime
import os
def get_gpu_memory_map():
result = subprocess.check_output(
[
'nvidia-smi', '--query-gpu=memory.free,utilization.gpu',
'--format=csv,nounits,noheader'
], encoding='utf-8')
gpu_info = [eval(x) for x in result.strip().split('\n')]
gpu_info = dict(zip(range(len(gpu_info)), gpu_info))
sorted_gpu_info = sorted(gpu_info.items(), key=lambda kv: kv[1][0], reverse=True)
sorted_gpu_info = sorted(sorted_gpu_info, key=lambda kv: kv[1][1])
return sorted_gpu_info
def choose_gpu(n_gpus=1, min_gpu_memory=9000, max_gpu_memory=100000, retry=False, sleep_time=30):
start_time = time.time()
sorted_gpu_info = get_gpu_memory_map()
print(f'gpu_id, (mem_left, util): {sorted_gpu_info}')
count = 0
while True:
gpus = []
for gpu_id, (mem_left, util) in sorted_gpu_info:
if mem_left >= min_gpu_memory and mem_left <= max_gpu_memory:
gpus.append(gpu_id)
print('use gpu:{} with {} MB left, util {}%'.format(gpu_id, mem_left, util))
if len(gpus) == n_gpus:
print('max num of gpus reached.')
break
if len(gpus) == 0:
if retry:
print(f'[{datetime.now().strftime("%H:%M:%S")}'
f' waited {time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))}]'
f' no gpu has memory >= {min_gpu_memory} MB, sleep {sleep_time}s...', end='\r')
time.sleep(sleep_time)
print()
else:
print(f'no gpu has memory >= {min_gpu_memory} MB, exiting...')
exit()
else:
break
sorted_gpu_info = get_gpu_memory_map()
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
visible_gpus = ','.join([str(gpu_id) for gpu_id in gpus])
print('visible gpus:', visible_gpus)
visible_gpus = str(gpus[0])
os.environ["CUDA_VISIBLE_DEVICES"] = visible_gpus