-
Notifications
You must be signed in to change notification settings - Fork 1
/
cv_subsampling.py
74 lines (61 loc) · 3.35 KB
/
cv_subsampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import json
import torch
import numpy as np
from experiments.utils import Logger
from experiments.rho_trainer import Trainer
from experiments.autoencoding.classification_metrics import *
from experiments.datasets.reduced import ReducedDataset
from params import CLASSIFIER_PARAMS, CLASSIFICATION_TRAINER_PARAMS, CLASSIFICATION_DATASET
from utils import get_dataset, get_classification_model
RUNS = 10
PERCENTAGES = [1.0, 0.8, 0.6, 0.4, 0.2]
def set_seed(seed):
CLASSIFICATION_TRAINER_PARAMS["seed"] = seed
np.random.seed(CLASSIFICATION_TRAINER_PARAMS["seed"])
torch.manual_seed(CLASSIFICATION_TRAINER_PARAMS["seed"])
def write_result(split, variant, classifier, dataset, trainer, result, params):
with open("results.csv", "a") as f:
f.write(f"{split};{variant};{classifier};{dataset};{trainer};{json.dumps(params)};{json.dumps(result)}\n")
CLASSIFICATION_DATASET["overwrite_cache"] = False
full_dataset = get_dataset(CLASSIFICATION_DATASET)
CLASSIFIER_PARAMS["features"] = full_dataset.get_input_size()
CLASSIFIER_PARAMS["edge_dim"] = full_dataset.get_edge_size()
CLASSIFIER_PARAMS["classes"] = len(full_dataset.get_classes())
if CLASSIFIER_PARAMS["classes"] == 2:
CLASSIFIER_PARAMS["classes"] = 1
for percentage in PERCENTAGES:
EXPERIMENT_NAME = f'{CLASSIFIER_PARAMS["name"]}_{CLASSIFICATION_DATASET["name"]+str(int(100*percentage))}_{CLASSIFICATION_TRAINER_PARAMS["name"]}'
with Logger("results/" + EXPERIMENT_NAME + "/out.log"):
for i in range(RUNS):
print(f"Starting run {i} of {EXPERIMENT_NAME}")
set_seed(CLASSIFICATION_TRAINER_PARAMS["seed"] + 1)
dataset = ReducedDataset(full_dataset, percentage)
model = get_classification_model(CLASSIFIER_PARAMS)
losses = [CrossEntropyLoss()]
metrics = [ClassAccuracy(), ClassPositivesNegatives(), ClassAUC(), ClassAP(), BestClassPositivesNegatives()]
CLASSIFICATION_TRAINER_PARAMS["experiment_name"] = EXPERIMENT_NAME
trainer = Trainer(model, losses, metrics, dataset, CLASSIFICATION_TRAINER_PARAMS)
trainer.train()
params = {**CLASSIFICATION_DATASET, **CLASSIFIER_PARAMS, \
**CLASSIFICATION_TRAINER_PARAMS, **{"percentage": percentage}}
write_result(
"Train", "End", CLASSIFIER_PARAMS["name"], CLASSIFICATION_DATASET["name"]+str(int(100*percentage)),
CLASSIFICATION_TRAINER_PARAMS["name"], trainer.evaluate(train_set=True), params
)
write_result(
"Test", "End", CLASSIFIER_PARAMS["name"], CLASSIFICATION_DATASET["name"]+str(int(100*percentage)),
CLASSIFICATION_TRAINER_PARAMS["name"], trainer.evaluate(train_set=False), params
)
trainer.load_checkpoint()
write_result(
"Train", "Checkpoint", CLASSIFIER_PARAMS["name"], CLASSIFICATION_DATASET["name"]+str(int(100*percentage)),
CLASSIFICATION_TRAINER_PARAMS["name"], trainer.evaluate(train_set=True), params
)
write_result(
"Test", "Checkpoint", CLASSIFIER_PARAMS["name"], CLASSIFICATION_DATASET["name"]+str(int(100*percentage)),
CLASSIFICATION_TRAINER_PARAMS["name"], trainer.evaluate(train_set=False), params
)
trainer.cleanup()
del trainer
del model
del dataset