--- title: Title keywords: fastai sidebar: home_sidebar ---
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.vision import *
from image_tabular.core import *
from image_tabular.metric import *
# use gpu by default if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional")
data_path = Path("./data/siim-isic-melanoma-classification/")
train_df = pd.read_csv(data_path/"train.csv")
test_df = pd.read_csv(data_path/"test.csv")
print(len(train_df), len(test_df))
train_df.head()
# extremely unbalanced dataset, most of the images are benign
train_df["target"].value_counts(normalize=True)
tfms = get_transforms(flip_vert=True)
size = 128
bs = 64
# idx for validation
val_idx = get_valid_index(train_df)
len(val_idx)
# load image data using train_df and prepare fastai LabelLists
image_data = (ImageList.from_df(train_df, path=data_path, cols="image_name",
folder="train_128", suffix=".jpg")
.split_by_idx(val_idx)
.label_from_df(cols="target")
.transform(tfms, size=size)
.databunch(bs=bs).normalize(imagenet_stats))
# add test data so that we can make predictions
test_image_data = ImageList.from_df(test_df, path=data_path, cols="image_name",
folder="test_128", suffix=".jpg")
image_data.add_test(test_image_data)
# show some example images
image_data.show_batch(rows=3, figsize=(12, 9))
# adjust loss function weight because the dataset is extremely unbalanced
weights = [1/(1-train_df["target"].mean()), 1/train_df["target"].mean()]
loss_func = CrossEntropyFlat(weight=torch.FloatTensor(weights).to(device))
# package everything in a fastai learner, add auc roc score as a metric
learn = cnn_learner(image_data, models.resnet50, lin_ftrs=[512, 256, 32], ps=0.2,
metrics=[accuracy, ROCAUC()], loss_func=loss_func)
learn.lr_find()
learn.recorder.plot()
# train
learn.fit_one_cycle(10, 1e-4)
# unfreeze all layer groups to train the entire model using differential learning rates
learn.unfreeze()
learn.fit_one_cycle(5, slice(1e-6, 1e-4))
# make predictions for the test set
preds, y = learn.get_preds(DatasetType.Test)
# submit predictions to kaggle
submit = pd.read_csv(data_path/"sample_submission.csv")
submit["target"] = preds[:, 1]
submit.to_csv(data_path/"image.csv", index=False)