-
Notifications
You must be signed in to change notification settings - Fork 2
/
trainer.py
115 lines (85 loc) · 3.45 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import time
import torch
import tqdm
from utils.eval_utils import accuracy
from utils.logging import AverageMeter, ProgressMeter
__all__ = ["train", "validate"]
def train(train_loader, model, criterion, optimizer, epoch, args, writer):
batch_time = AverageMeter("Time", ":6.3f")
data_time = AverageMeter("Data", ":6.3f")
losses = AverageMeter("Loss", ":.3f")
top1 = AverageMeter("Acc@1", ":6.2f")
top5 = AverageMeter("Acc@5", ":6.2f")
progress = ProgressMeter(
len(train_loader),
[batch_time, data_time, losses, top1, top5],
prefix=f"Epoch: [{epoch}]",
)
# switch to train mode
model.train()
batch_size = train_loader.batch_size
num_batches = len(train_loader)
end = time.time()
for i, (images, target) in tqdm.tqdm(
enumerate(train_loader), ascii=True, total=len(train_loader)
):
# measure data loading time
data_time.update(time.time() - end)
if args.gpu is not None:
images = images.cuda(args.gpu, non_blocking=True)
target = target.cuda(args.gpu, non_blocking=True).long()
# compute output
output = model(images)
loss = criterion(output, target.view(-1))
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1.item(), images.size(0))
top5.update(acc5.item(), images.size(0))
# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
t = (num_batches * epoch + i) * batch_size
progress.display(i)
progress.write_to_tensorboard(writer, prefix="train", global_step=t)
return top1.avg, top5.avg
def validate(val_loader, model, criterion, args, writer, epoch):
batch_time = AverageMeter("Time", ":6.3f", write_val=False)
losses = AverageMeter("Loss", ":.3f", write_val=False)
top1 = AverageMeter("Acc@1", ":6.2f", write_val=False)
top5 = AverageMeter("Acc@5", ":6.2f", write_val=False)
progress = ProgressMeter(
len(val_loader), [batch_time, losses, top1, top5], prefix="Test: "
)
# switch to evaluate mode
model.eval()
with torch.no_grad():
end = time.time()
for i, (images, target) in tqdm.tqdm(
enumerate(val_loader), ascii=True, total=len(val_loader)
):
if args.gpu is not None:
images = images.cuda(args.gpu, non_blocking=True)
target = target.cuda(args.gpu, non_blocking=True).long()
# compute output
output = model(images)
loss = criterion(output, target.view(-1))
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1.item(), images.size(0))
top5.update(acc5.item(), images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)
progress.display(len(val_loader))
if writer is not None:
progress.write_to_tensorboard(writer, prefix="test", global_step=epoch)
return top1.avg, top5.avg