I have recently started a new project using PyTorch and I am still new in AI. In order to perform better on my dataset during training process I used cross-validation technique. Everyone seems to work fine but I am struggling with reproducibility. I even tried to set SEED number for each k-fold iteration but it does not seem to work at all. Changes in loss and accuracy are insignificant but they are. Before using cross-validation everything worked perfect. Thank you in advance.
Here is a for loop for my k-fold. I used a solution from: k-fold cross validation using DataLoaders in PyTorch
K_FOLD = 5
fraction = 1 / K_FOLD
unit = int(dataset_length * fraction)
for i in range(K_FOLD):
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED) # if you are using multi-GPU.
np.random.seed(SEED) # Numpy module.
random.seed(SEED) # Python random module.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
print("-----------K-FOLD {}------------".format(i+1))
tr_ll = 0
print("Train left begin:", tr_ll)
tr_lr = i * unit
print("Train left end:", tr_lr)
val_l = tr_lr
print("Validation begin:", val_l)
val_r = i * unit + unit
print("Validation end:", val_r)
tr_rl = val_r
print("Train right begin:", tr_rl)
tr_rr = dataset_length
print("Train right end:", tr_rr)
# msg
# print("train indices: [%d,%d),[%d,%d), test indices: [%d,%d)"
# % (tr_ll,tr_lr,tr_rl,tr_rr,val_l,val_r))
train_left_indices = list(range(tr_ll, tr_lr))
train_right_indices = list(range(tr_rl, tr_rr))
train_indices = train_left_indices + train_right_indices
val_indices = list(range(val_l, val_r))
# print("TRAIN Indices:", train_indices, "VAL Indices:", val_indices)
train_set = torch.utils.data.dataset.Subset(DATASET, train_indices)
val_set = torch.utils.data.dataset.Subset(DATASET, val_indices)
# print("Length of train set:", len(train_set), "Length of val set:", len(val_set))
image_datasets = {"train": train_set, "val": val_set}
loader = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=10, shuffle=True)
for x in sets}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# training
trained_model = train_model(AlexNet, CRITERION, OPTIMIZER,
dataloader=loader, dataset_sizes=dataset_sizes, num_epochs=EPOCHS, k_fold=i)
Aucun commentaire:
Enregistrer un commentaire