diff --git a/Baseline.py b/Baseline.py
deleted file mode 100644
index 2970b46..0000000
--- a/Baseline.py
+++ /dev/null
@@ -1,989 +0,0 @@
-# %%
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning)
-warnings.filterwarnings("ignore", category=UserWarning)
-warnings.filterwarnings("ignore", category=FutureWarning)
-
-import numpy as np # linear algebra
-import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
-import matplotlib.pyplot as plt
-# %matplotlib inline
-import seaborn as sns
-sns.set()
-from PIL import Image
-
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR, CyclicLR
-import torchvision
-from torchvision import datasets, models, transforms
-from torch.utils.data import Dataset, DataLoader
-import torch.nn.functional as F
-
-from sklearn.model_selection import train_test_split, StratifiedKFold
-from sklearn.utils.class_weight import compute_class_weight
-
-
-from glob import glob
-from skimage.io import imread
-from os import listdir
-
-import time
-import copy
-from tqdm import tqdm_notebook as tqdm
-
-
-# %%
-
-
-run_training = True
-retrain = False
-find_learning_rate = True
-
-
-# %%
-
-files = listdir("input/breast-histopathology-images/")
-print(len(files))
-
-
-# %%
-
-files[0:10]
-
-
-# %%
-
-files = listdir("input/breast-histopathology-images/IDC_regular_ps50_idx5/")
-len(files)
-
-
-# %%
-
-base_path = "input/breast-histopathology-images/IDC_regular_ps50_idx5/"
-folder = listdir(base_path)
-num_patients = 89  # 设定要使用的患者数量为 10
-folder = folder[:num_patients]  # 只取前 10 个患者
-len(folder)
-
-
-# %%
-
-total_images = 0
-for n in range(len(folder)):
-    patient_id = folder[n]
-    for c in [0, 1]:
-        patient_path = base_path + patient_id
-        class_path = patient_path + "/" + str(c) + "/"
-        subfiles = listdir(class_path)
-        total_images += len(subfiles)
-
-
-# %%
-
-total_images
-
-
-# %%
-
-data = pd.DataFrame(index=np.arange(0, total_images), columns=["patient_id", "path", "target"])
-
-k = 0
-for n in range(len(folder)):
-    patient_id = folder[n]
-    patient_path = base_path + patient_id
-    for c in [0,1]:
-        class_path = patient_path + "/" + str(c) + "/"
-        subfiles = listdir(class_path)
-        for m in range(len(subfiles)):
-            image_path = subfiles[m]
-            data.iloc[k]["path"] = class_path + image_path
-            data.iloc[k]["target"] = c
-            data.iloc[k]["patient_id"] = patient_id
-            k += 1
-
-data.head()
-
-
-# %%
-
-data.shape
-
-
-# %%
-
-cancer_perc = data.groupby("patient_id").target.value_counts()/ data.groupby("patient_id").target.size()
-cancer_perc = cancer_perc.unstack()
-
-fig, ax = plt.subplots(1,3,figsize=(20,5))
-sns.distplot(data.groupby("patient_id").size(), ax=ax[0], color="Orange", kde=False, bins=30)
-ax[0].set_xlabel("Number of patches")
-ax[0].set_ylabel("Frequency");
-ax[0].set_title("How many patches do we have per patient?");
-sns.distplot(cancer_perc.loc[:, 1]*100, ax=ax[1], color="Tomato", kde=False, bins=30)
-ax[1].set_title("How much percentage of an image is covered by IDC?")
-ax[1].set_ylabel("Frequency")
-ax[1].set_xlabel("% of patches with IDC");
-sns.countplot(data.target, palette="Set2", ax=ax[2]);
-ax[2].set_xlabel("no(0) versus yes(1)")
-ax[2].set_title("How many patches show IDC?");
-
-
-# %%
-
-data.target = data.target.astype(int)
-
-
-# %%
-
-pos_selection = np.random.choice(data[data.target==1].index.values, size=50, replace=False)
-neg_selection = np.random.choice(data[data.target==0].index.values, size=50, replace=False)
-
-
-# %%
-
-fig, ax = plt.subplots(5,10,figsize=(20,10))
-
-for n in range(5):
-    for m in range(10):
-        idx = pos_selection[m + 10*n]
-        image = imread(data.loc[idx, "path"])
-        ax[n,m].imshow(image)
-        ax[n,m].grid(False)
-
-
-# %%
-
-fig, ax = plt.subplots(5,10,figsize=(20,10))
-
-for n in range(5):
-    for m in range(10):
-        idx = neg_selection[m + 10*n]
-        image = imread(data.loc[idx, "path"])
-        ax[n,m].imshow(image)
-        ax[n,m].grid(False)
-
-
-# %%
-
-def extract_coords(df):
-    coord = df.path.str.rsplit("_", n=4, expand=True)
-    coord = coord.drop([0, 1, 4], axis=1)
-    coord = coord.rename({2: "x", 3: "y"}, axis=1)
-    coord.loc[:, "x"] = coord.loc[:,"x"].str.replace("x", "", case=False).astype(int)
-    coord.loc[:, "y"] = coord.loc[:,"y"].str.replace("y", "", case=False).astype(int)
-    df.loc[:, "x"] = coord.x.values
-    df.loc[:, "y"] = coord.y.values
-    return df
-
-def get_cancer_dataframe(patient_id, cancer_id):
-    path = base_path + patient_id + "/" + cancer_id
-    files = listdir(path)
-    dataframe = pd.DataFrame(files, columns=["filename"])
-    path_names = path + "/" + dataframe.filename.values
-    dataframe = dataframe.filename.str.rsplit("_", n=4, expand=True)
-    dataframe.loc[:, "target"] = int(cancer_id)
-    dataframe.loc[:, "path"] = path_names
-    dataframe = dataframe.drop([0, 1, 4], axis=1)
-    dataframe = dataframe.rename({2: "x", 3: "y"}, axis=1)
-    dataframe.loc[:, "x"] = dataframe.loc[:,"x"].str.replace("x", "", case=False).astype(int)
-    dataframe.loc[:, "y"] = dataframe.loc[:,"y"].str.replace("y", "", case=False).astype(int)
-    return dataframe
-
-def get_patient_dataframe(patient_id):
-    df_0 = get_cancer_dataframe(patient_id, "0")
-    df_1 = get_cancer_dataframe(patient_id, "1")
-    patient_df = pd.concat([df_0, df_1], ignore_index=True)
-    return patient_df
-
-
-# %%
-
-example = get_patient_dataframe(data.patient_id.values[0])
-example.head()
-
-
-# %%
-
-fig, ax = plt.subplots(5, 3, figsize=(20, 27))
-
-patient_ids = data.patient_id.unique()
-
-for n in range(5):
-    for m in range(3):
-        patient_id = patient_ids[m + 3 * n]
-        example_df = get_patient_dataframe(patient_id)
-
-        ax[n, m].scatter(example_df.x.values, example_df.y.values, c=example_df.target.values, cmap="coolwarm", s=20);
-        ax[n, m].set_title("patient " + patient_id)
-        ax[n, m].set_xlabel("y coord")
-        ax[n, m].set_ylabel("x coord")
-
-
-# %%
-
-
-def visualise_breast_tissue(patient_id, pred_df=None):
-    example_df = get_patient_dataframe(patient_id)
-    max_point = [example_df.y.max() - 1, example_df.x.max() - 1]
-    grid = 255 * np.ones(shape=(max_point[0] + 50, max_point[1] + 50, 3)).astype(np.uint8)
-    mask = 255 * np.ones(shape=(max_point[0] + 50, max_point[1] + 50, 3)).astype(np.uint8)
-    if pred_df is not None:
-        patient_df = pred_df[pred_df.patient_id == patient_id].copy()
-    mask_proba = np.zeros(shape=(max_point[0] + 50, max_point[1] + 50, 1)).astype(float)
-
-    broken_patches = []
-    for n in range(len(example_df)):
-        try:
-            image = imread(example_df.path.values[n])
-
-            target = example_df.target.values[n]
-
-            x_coord = int(example_df.x.values[n])
-            y_coord = int(example_df.y.values[n])
-            x_start = x_coord - 1
-            y_start = y_coord - 1
-            x_end = x_start + 50
-            y_end = y_start + 50
-
-            grid[y_start:y_end, x_start:x_end] = image
-            if target == 1:
-                mask[y_start:y_end, x_start:x_end, 0] = 250
-                mask[y_start:y_end, x_start:x_end, 1] = 0
-                mask[y_start:y_end, x_start:x_end, 2] = 0
-            if pred_df is not None:
-                proba = patient_df[
-                    (patient_df.x == x_coord) & (patient_df.y == y_coord)].proba
-                mask_proba[y_start:y_end, x_start:x_end, 0] = float(proba)
-
-        except ValueError:
-            broken_patches.append(example_df.path.values[n])
-
-    return grid, mask, broken_patches, mask_proba
-
-
-# %%
-
-example = "8863"
-grid, mask, broken_patches,_ = visualise_breast_tissue(example)
-
-fig, ax = plt.subplots(1,2,figsize=(20,10))
-ax[0].imshow(grid, alpha=0.9)
-ax[1].imshow(mask, alpha=0.8)
-ax[1].imshow(grid, alpha=0.7)
-ax[0].grid(False)
-ax[1].grid(False)
-for m in range(2):
-    ax[m].set_xlabel("y-coord")
-    ax[m].set_ylabel("y-coord")
-ax[0].set_title("Breast tissue slice of patient: " + patient_id)
-ax[1].set_title("Cancer tissue colored red \n of patient: " + patient_id);
-
-
-# %%
-
-broken_patches
-
-
-# %%
-
-BATCH_SIZE = 8
-NUM_CLASSES = 2
-
-OUTPUT_PATH = ""
-MODEL_PATH = "input/breastcancermodel/"
-LOSSES_PATH = "input/breastcancermodel/"
-
-
-# %%
-
-torch.manual_seed(0)
-np.random.seed(0)
-
-
-# %%
-
-data.head()
-data.loc[:, "target"] = data.target.astype(str)
-data.info()
-
-
-# %%
-
-patients = data.patient_id.unique()
-
-train_ids, sub_test_ids = train_test_split(patients,
-                                           test_size=0.3,
-                                           random_state=0)
-test_ids, dev_ids = train_test_split(sub_test_ids, test_size=0.5, random_state=0)
-
-
-# %%
-
-print(len(train_ids)/patients.shape[0]*100, len(dev_ids)/patients.shape[0]*100, len(test_ids)/patients.shape[0]*100)
-
-
-# %%
-
-print(len(train_ids), len(dev_ids), len(test_ids))
-
-
-# %%
-
-train_df = data.loc[data.patient_id.isin(train_ids),:].copy()
-test_df = data.loc[data.patient_id.isin(test_ids),:].copy()
-dev_df = data.loc[data.patient_id.isin(dev_ids),:].copy()
-
-train_df = extract_coords(train_df)
-test_df = extract_coords(test_df)
-dev_df = extract_coords(dev_df)
-
-
-# %%
-
-fig, ax = plt.subplots(1,3,figsize=(20,5))
-sns.countplot(train_df.target, ax=ax[0], palette="Reds")
-ax[0].set_title("Train data")
-sns.countplot(dev_df.target, ax=ax[1], palette="Blues")
-ax[1].set_title("Dev data")
-sns.countplot(test_df.target, ax=ax[2], palette="Greens");
-ax[2].set_title("Test data");
-
-
-# %%
-
-
-def my_transform(key="train", plot=False):
-    train_sequence = [transforms.Resize((50, 50)),
-                      transforms.RandomHorizontalFlip(),
-                      transforms.RandomVerticalFlip()]
-    val_sequence = [transforms.Resize((50, 50))]
-    if plot == False:
-        train_sequence.extend([
-            transforms.ToTensor(),
-            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
-        val_sequence.extend([
-            transforms.ToTensor(),
-            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
-
-    data_transforms = {'train': transforms.Compose(train_sequence), 'val': transforms.Compose(val_sequence)}
-    return data_transforms[key]
-
-
-# %%
-
-
-class BreastCancerDataset(Dataset):
-
-    def __init__(self, df, transform=None):
-        self.states = df
-        self.transform = transform
-
-    def __len__(self):
-        return len(self.states)
-
-    def __getitem__(self, idx):
-        patient_id = self.states.patient_id.values[idx]
-        x_coord = self.states.x.values[idx]
-        y_coord = self.states.y.values[idx]
-        image_path = self.states.path.values[idx]
-        image = Image.open(image_path)
-        image = image.convert('RGB')
-
-        if self.transform:
-            image = self.transform(image)
-
-        if "target" in self.states.columns.values:
-            target = int(self.states.target.values[idx])
-        else:
-            target = None
-
-        return {"image": image,
-                "label": target,
-                "patient_id": patient_id,                                         
-                "x": x_coord,
-                "y": y_coord}
-
-                          
-
-# %%
-                 
-train_dataset = BreastCancerDataset(train_df, transform=my_transform(key="train"))
-dev_dataset = BreastCancerDataset(dev_df, transform=my_transform(key="val"))
-test_dataset = BreastCancerDataset(test_df, transform=my_transform(key="val"))
-
-
-# %%
-
-image_datasets = {"train": train_dataset, "dev": dev_dataset, "test": test_dataset}
-dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "dev", "test"]}
-
-
-# %%
-
-fig, ax = plt.subplots(3,6,figsize=(20,11))
-
-train_transform = my_transform(key="train", plot=True)
-val_transform = my_transform(key="val", plot=True)
-
-for m in range(6):
-    filepath = train_df.path.values[m]
-    image = Image.open(filepath)
-    ax[0,m].imshow(image)
-    transformed_img = train_transform(image)
-    ax[1,m].imshow(transformed_img)
-    ax[2,m].imshow(val_transform(image))
-    ax[0,m].grid(False)
-    ax[1,m].grid(False)
-    ax[2,m].grid(False)
-    ax[0,m].set_title(train_df.patient_id.values[m] + "\n target: " + train_df.target.values[m])
-    ax[1,m].set_title("Preprocessing for train")
-    ax[2,m].set_title("Preprocessing for val")
-
-
-# %%
-
-train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
-dev_dataloader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)
-test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False)
-
-
-# %%
-
-dataloaders = {"train": train_dataloader, "dev": dev_dataloader, "test": test_dataloader}
-
-
-# %%
-
-print(len(dataloaders["train"]), len(dataloaders["dev"]), len(dataloaders["test"]))
-
-
-# %%
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-device
-
-
-# %%
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-class BasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
-        super(BasicBlock, self).__init__()
-        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU(inplace=True)
-        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(out_channels)
-        self.downsample = downsample
-
-    def forward(self, x):
-        identity = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.downsample is not None:
-            identity = self.downsample(x)
-
-        out += identity
-        out = self.relu(out)
-
-        return out
-
-class ResNet18(nn.Module):
-    def __init__(self, num_classes=2):
-        super(ResNet18, self).__init__()
-        self.in_channels = 64
-        
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.relu = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        
-        self.layer1 = self._make_layer(64, 2, stride=1)
-        self.layer2 = self._make_layer(128, 2, stride=2)
-        self.layer3 = self._make_layer(256, 2, stride=2)
-        self.layer4 = self._make_layer(512, 2, stride=2)
-
-        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-        self.fc = nn.Linear(512, num_classes)
-
-        self._initialize_weights()
-
-    def _make_layer(self, out_channels, blocks, stride=1):
-        downsample = None
-        if stride != 1 or self.in_channels != out_channels:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(out_channels),
-            )
-        layers = [BasicBlock(self.in_channels, out_channels, stride, downsample)]
-        self.in_channels = out_channels
-        for _ in range(1, blocks):
-            layers.append(BasicBlock(out_channels, out_channels))
-        return nn.Sequential(*layers)
-
-    def _initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
-            elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                nn.init.xavier_uniform_(m.weight)
-                nn.init.constant_(m.bias, 0)
-
-    def forward(self, x):
-        x = self.relu(self.bn1(self.conv1(x)))
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        x = torch.flatten(x, 1)
-        x = self.fc(x)
-        return x
-
-
-# 创建模型实例
-NUM_CLASSES = 2  # 根据你的需求修改分类数
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = ResNet18(num_classes=NUM_CLASSES).to(device)
-
-# %%
-
-
-# %%
-
-weights = compute_class_weight(y=train_df.target.values, class_weight="balanced", classes=train_df.target.unique())
-class_weights = torch.FloatTensor(weights)
-if device.type=="cuda":
-    class_weights = class_weights.cuda()
-print(class_weights)
-
-
-# %%
-
-train_df.target.unique()
-
-
-# %%
-
-criterion = nn.CrossEntropyLoss(weight=class_weights)
-
-
-# %%
-
-
-def f1_score(preds, targets):
-    tp = (preds * targets).sum().to(torch.float32)
-    fp = ((1 - targets) * preds).sum().to(torch.float32)
-    fn = (targets * (1 - preds)).sum().to(torch.float32)
-
-    epsilon = 1e-7
-    precision = tp / (tp + fp + epsilon)
-    recall = tp / (tp + fn + epsilon)
-
-    f1_score = 2 * precision * recall / (precision + recall + epsilon)
-    return f1_score
-
-
-# %%
-
-
-def train_loop(model, criterion, optimizer, lr_find=False, scheduler=None, num_epochs=3, lam=0.0):
-    since = time.time()
-    if lr_find:
-        phases = ["train"]
-    else:
-        phases = ["train", "dev", "test"]
-
-    best_model_wts = copy.deepcopy(model.state_dict())
-    best_acc = 0.0
-
-    loss_dict = {"train": [], "dev": [], "test": []}
-    lam_tensor = torch.tensor(lam, device=device)
-
-    running_loss_dict = {"train": [], "dev": [], "test": []}
-
-    lr_find_loss = []
-    lr_find_lr = []
-    smoothing = 0.2
-
-    for epoch in range(num_epochs):
-        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
-        print('-' * 10)
-
-        for phase in phases:
-            if phase == "train":
-                model.train()
-            else:
-                model.eval()
-
-            running_loss = 0.0
-            running_corrects = 0
-
-            tk0 = tqdm(dataloaders[phase], total=int(len(dataloaders[phase])))
-
-            counter = 0
-            for bi, d in enumerate(tk0):
-                inputs = d["image"]
-                labels = d["label"]
-                inputs = inputs.to(device, dtype=torch.float)
-                labels = labels.to(device, dtype=torch.long)
-
-                # zero the parameter gradients
-                optimizer.zero_grad()
-
-                # forward
-                # track history if only in train
-
-                with torch.set_grad_enabled(phase == 'train'):
-                    outputs = model(inputs)
-                    _, preds = torch.max(outputs, 1)
-                    loss = criterion(outputs, labels)
-
-                    # backward + optimize only if in training phase
-                    if phase == 'train':
-                        loss.backward()
-
-                        # l2_reg = torch.tensor(0., device=device)
-                        # for param in model.parameters():
-                        # l2_reg = lam_tensor * torch.norm(param)
-
-                        # loss += l2_reg
-
-                        optimizer.step()
-                        # cyclical lr schedule is invoked after each batch
-                        if scheduler is not None:
-                            scheduler.step()
-                            if lr_find:
-                                lr_step = optimizer.state_dict()["param_groups"][0]["lr"]
-                                lr_find_lr.append(lr_step)
-                                if counter == 0:
-                                    lr_find_loss.append(loss.item())
-                                else:
-                                    smoothed_loss = smoothing * loss.item() + (1 - smoothing) * lr_find_loss[-1]
-                                    lr_find_loss.append(smoothed_loss)
-
-                # statistics
-                running_loss += loss.item() * inputs.size(0)
-                running_corrects += torch.sum(preds == labels.data)
-
-                counter += 1
-
-                tk0.set_postfix({'loss': running_loss / (counter * dataloaders[phase].batch_size),
-                                 'accuracy': running_corrects.double() / (counter * dataloaders[phase].batch_size)})
-                running_loss_dict[phase].append(running_loss / (counter * dataloaders[phase].batch_size))
-
-            epoch_loss = running_loss / dataset_sizes[phase]
-            loss_dict[phase].append(epoch_loss)
-            epoch_acc = running_corrects.double() / dataset_sizes[phase]
-            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
-                phase, epoch_loss, epoch_acc))
-
-            # deep copy the model
-            if phase == 'dev' and epoch_acc > best_acc:
-                best_acc = epoch_acc
-                best_model_wts = copy.deepcopy(model.state_dict())
-        print()
-
-    time_elapsed = time.time() - since
-    print('Training complete in {:.0f}m {:.0f}s'.format(
-        time_elapsed // 60, time_elapsed % 60))
-    print('Best val Acc: {:4f}'.format(best_acc))
-
-    # load best model weights
-    model.load_state_dict(best_model_wts)
-    results = {"model": model,
-               "loss_dict": loss_dict,
-               "running_loss_dict": running_loss_dict,
-               "lr_find": {"lr": lr_find_lr, "loss": lr_find_loss}}
-    return results
-
-
-# %%
-
-
-start_lr = 1e-6
-end_lr = 0.1
-
-
-# %%
-
-
-def get_lr_search_scheduler(optimizer, min_lr, max_lr, max_iterations):
-    # max_iterations should be the number of steps within num_epochs_*epoch_iterations
-    # this way the learning rate increases linearily within the period num_epochs*epoch_iterations
-    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer,
-                                                  base_lr=min_lr,
-                                                  max_lr=max_lr,
-                                                  step_size_up=max_iterations,
-                                                  step_size_down=max_iterations,
-                                                  mode="triangular")
-
-    return scheduler
-
-
-def get_scheduler(optimiser, min_lr, max_lr, stepsize):
-    # suggested_stepsize = 2*num_iterations_within_epoch
-    stepsize_up = int(stepsize / 2)
-    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimiser,
-                                                  base_lr=min_lr,
-                                                  max_lr=max_lr,
-                                                  step_size_up=stepsize_up,
-                                                  step_size_down=stepsize_up,
-                                                  mode="triangular")
-    return scheduler
-
-
-# %%
-
-
-import math
-
-if find_learning_rate:
-    lr_find_epochs = 1
-    optimizer = optim.SGD(model.fc.parameters(), start_lr)
-    scheduler = get_lr_search_scheduler(optimizer, start_lr, end_lr, lr_find_epochs * len(train_dataloader))
-    results = train_loop(model, criterion, optimizer, lr_find=True, scheduler=scheduler, num_epochs=lr_find_epochs)
-    lr_find_lr, lr_find_loss = results["lr_find"]["lr"], results["lr_find"]["loss"]
-
-    find_lr_df = pd.DataFrame(lr_find_loss, columns=["smoothed loss"])
-    find_lr_df.loc[:, "lr"] = lr_find_lr
-    find_lr_df.to_csv("learning_rate_search.csv", index=False)
-else:
-    find_lr_df = pd.read_csv(MODEL_PATH + "learning_rate_search.csv")
-
-
-# %%
-
-fig, ax = plt.subplots(1,2,figsize=(20,5))
-ax[0].plot(find_lr_df.lr.values)
-ax[1].plot(find_lr_df["smoothed loss"].values)
-ax[0].set_xlabel("Steps")
-ax[0].set_ylabel("Learning rate")
-ax[1].set_xlabel("Steps")
-ax[1].set_ylabel("Loss");
-ax[0].set_title("How the learning rate increases during search")
-ax[1].set_title("How the training loss evolves during search")
-
-plt.figure(figsize=(20,5))
-plt.plot(find_lr_df.lr.values, find_lr_df["smoothed loss"].values, '-', color="tomato");
-plt.xlabel("Learning rate")
-plt.xscale("log")
-plt.ylabel("Smoothed Loss")
-plt.title("Searching for the optimal learning rate");
-
-
-# %%
-
-start_lr = 1e-6
-end_lr = 0.006
-
-
-# %%
-
-if run_training:
-    NUM_EPOCHS = 2
-    optimizer = optim.SGD(model.fc.parameters(), lr=0.01)
-    scheduler = get_scheduler(optimizer, start_lr, end_lr, 2 * NUM_EPOCHS)
-    results = train_loop(model, criterion, optimizer, scheduler=scheduler, num_epochs=NUM_EPOCHS)
-    model, loss_dict, running_loss_dict = results["model"], results["loss_dict"], results["running_loss_dict"]
-
-    if device == "cpu":
-        OUTPUT_PATH += ".pth"
-    else:
-        OUTPUT_PATH += "_cuda.pth"
-
-    torch.save(model.state_dict(), OUTPUT_PATH)
-
-    losses_df = pd.DataFrame(loss_dict["train"], columns=["train"])
-    losses_df.loc[:, "dev"] = loss_dict["dev"]
-    losses_df.loc[:, "test"] = loss_dict["test"]
-    losses_df.to_csv("losses_breastcancer.csv", index=False)
-
-    running_losses_df = pd.DataFrame(running_loss_dict["train"], columns=["train"])
-    running_losses_df.loc[0:len(running_loss_dict["dev"]) - 1, "dev"] = running_loss_dict["dev"]
-    running_losses_df.loc[0:len(running_loss_dict["test"]) - 1, "test"] = running_loss_dict["test"]
-    running_losses_df.to_csv("running_losses_breastcancer.csv", index=False)
-else:
-    if device == "cpu":
-        load_path = MODEL_PATH + ".pth"
-    else:
-        load_path = MODEL_PATH + "_cuda.pth"
-    model.load_state_dict(torch.load(load_path, map_location='cpu'))
-    model.eval()
-
-    losses_df = pd.read_csv(LOSSES_PATH + "losses_breastcancer.csv")
-    running_losses_df = pd.read_csv(LOSSES_PATH + "running_losses_breastcancer.csv")
-
-
-# %%
-
-plt.figure(figsize=(20,5))
-
-plt.plot(losses_df["train"], '-o', label="train")
-plt.plot(losses_df["dev"], '-o', label="dev")
-plt.plot(losses_df["test"], '-o', label="dev")
-plt.xlabel("Epoch")
-plt.ylabel("Weighted x-entropy")
-plt.title("Loss change over epoch")
-plt.legend();
-
-
-# %%
-
-fig, ax = plt.subplots(3,1,figsize=(20,15))
-
-ax[0].plot(running_losses_df["train"], '-o', label="train")
-ax[0].set_xlabel("Step")
-ax[0].set_ylabel("Weighted x-entropy")
-ax[0].set_title("Loss change over steps")
-ax[0].legend();
-
-ax[1].plot(running_losses_df["dev"], '-o', label="dev", color="orange")
-ax[1].set_xlabel("Step")
-ax[1].set_ylabel("Weighted x-entropy")
-ax[1].set_title("Loss change over steps")
-ax[1].legend();
-
-ax[2].plot(running_losses_df["test"], '-o', label="test", color="mediumseagreen")
-ax[2].set_xlabel("Step")
-ax[2].set_ylabel("Weighted x-entropy")
-ax[2].set_title("Loss change over steps")
-ax[2].legend();
-
-
-# %%
-
-
-def sigmoid(x):
-    return 1. / (1 + np.exp(-x))
-
-
-def evaluate_model(model, predictions_df, key):
-    was_training = model.training
-    model.eval()
-
-    with torch.no_grad():
-        for i, data in enumerate(dataloaders[key]):
-            inputs = data["image"].to(device)
-            labels = data["label"].to(device)
-
-            outputs = model(inputs)
-            _, preds = torch.max(outputs, 1)
-
-            proba = outputs.cpu().numpy().astype(float)
-            predictions_df.loc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE - 1, "proba"] = sigmoid(proba[:, 1])
-            predictions_df.loc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE - 1, "true"] = data["label"].numpy().astype(int)
-            predictions_df.loc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE - 1, "predicted"] = preds.cpu().numpy().astype(
-                int)
-            predictions_df.loc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE - 1, "x"] = data["x"].numpy()
-            predictions_df.loc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE - 1, "y"] = data["y"].numpy()
-            predictions_df.loc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE - 1, "patient_id"] = data["patient_id"]
-
-    predictions_df = predictions_df.dropna()
-    return predictions_df
-
-
-# %%
-
-
-if run_training:
-    dev_predictions = pd.DataFrame(index=np.arange(0, dataset_sizes["dev"]), columns=["true", "predicted", "proba"])
-    test_predictions = pd.DataFrame(index=np.arange(0, dataset_sizes["test"]), columns=["true", "predicted", "proba"])
-
-    dev_predictions = evaluate_model(model, dev_predictions, "dev")
-    test_predictions = evaluate_model(model, test_predictions, "test")
-
-    dev_predictions.to_csv("dev_predictions.csv", index=False)
-    test_predictions.to_csv("test_predictions.csv", index=False)
-
-else:
-
-    dev_predictions = pd.read_csv(LOSSES_PATH + "dev_predictions.csv")
-    test_predictions = pd.read_csv(LOSSES_PATH + "test_predictions.csv")
-
-    dev_predictions.patient_id = dev_predictions.patient_id.astype(str)
-    
-
-
-# %%
-fig, ax = plt.subplots(3, 3, figsize=(20, 20))
-
-print("Unique patient IDs in dev_predictions:")
-print(dev_predictions['patient_id'].unique())
-for n in range(3):
-
-    idx = dev_predictions.patient_id.unique()[n]
-    grid, mask, broken_patches, mask_proba = visualise_breast_tissue(idx, pred_df=dev_predictions)
-
-    ax[n, 0].imshow(grid, alpha=0.9)
-    ax[n, 1].imshow(mask, alpha=0.8)
-    ax[n, 1].imshow(grid, alpha=0.7)
-    ax[n, 2].imshow(mask_proba[:, :, 0], cmap="YlOrRd")
-
-    for m in range(3):
-        ax[n, m].set_xlabel("y-coord")
-        ax[n, m].set_ylabel("x-coord")
-        ax[n, m].grid(False)
-
-    ax[n, 0].set_title("Breast tissue slice of patient: " + patient_id)
-    ax[n, 1].set_title("Cancer tissue colored red \n of patient: " + patient_id);
-    ax[n, 2].set_title("Cancer probability");
-
-
-# %%
-
-dev_predictions.head()
-
-
-# %%
-
-fig, ax = plt.subplots(1,3,figsize=(20,5))
-sns.countplot(dev_predictions.true.astype(float), ax=ax[0], palette="Reds_r")
-ax[0].set_title("Target counts of dev data");
-sns.distplot(dev_predictions.proba.astype(float), ax=ax[1], kde=False, color="tomato")
-ax[0].set_title("Predicted probability of cancer in dev");
-sns.distplot(test_predictions.proba.astype(float), ax=ax[2], kde=False, color="mediumseagreen");
-ax[2].set_title("Predicted probability of cancer in test");
-
-
-# %%
-
-from sklearn.metrics import confusion_matrix
-
-
-def get_confusion_matrix(y_true, y_pred):
-    transdict = {1: "cancer", 0: "no cancer"}
-    y_t = np.array([transdict[x] for x in y_true])
-    y_p = np.array([transdict[x] for x in y_pred])
-
-    labels = ["no cancer", "cancer"]
-    index_labels = ["actual no cancer", "actual cancer"]
-    col_labels = ["predicted no cancer", "predicted cancer"]
-    confusion = confusion_matrix(y_t, y_p, labels=labels)
-    confusion_df = pd.DataFrame(confusion, index=index_labels, columns=col_labels)
-    for n in range(2):
-        confusion_df.iloc[n] = confusion_df.iloc[n] / confusion_df.sum(axis=1).iloc[n]
-    return confusion_df
-
-# %%
-
-
-