--- title: Title keywords: fastai sidebar: home_sidebar ---
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.vision import *
from fastai.tabular import *
from image_tabular.core import *
from image_tabular.dataset import *
from image_tabular.model import *
from image_tabular.metric import *
# use gpu by default if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional")
data_path = Path("./data/siim-isic-melanoma-classification/")
train_df = pd.read_csv(data_path/"train.csv")
test_df = pd.read_csv(data_path/"test.csv")
print(len(train_df), len(test_df))
train_df.head()
# extremely unbalanced dataset, most of the images are benign
train_df["target"].value_counts(normalize=True)
# idx for validation
val_idx = get_valid_index(train_df)
len(val_idx)
dep_var = 'target'
cat_names = ['sex', 'anatom_site_general_challenge']
cont_names = ['age_approx']
procs = [FillMissing, Categorify, Normalize]
bs=64
test_data = TabularList.from_df(test_df, path=data_path,
cat_names=cat_names, cont_names=cont_names)
tab_data = (TabularList.from_df(train_df, path=data_path, cat_names=cat_names,
cont_names=cont_names, procs=procs)
.split_by_idx(val_idx)
.label_from_df(cols=dep_var)
.add_test(test_data)
.databunch(bs=bs))
# examples
tab_data.show_batch(rows=10)
# adjust loss function weight because the dataset is extremely unbalanced
weights = [1/(1-train_df["target"].mean()), 1/train_df["target"].mean()]
loss_func = CrossEntropyFlat(weight=torch.FloatTensor(weights).to(device))
# package everything in a fastai learner, add auc roc score as a metric
learn = tabular_learner(tab_data, layers=[8,8], metrics=[accuracy, ROCAUC()],
loss_func=loss_func)
# find learning rate
learn.lr_find()
learn.recorder.plot()
# train
learn.fit_one_cycle(10, 1e-3)
# make predictions for the test set
preds, y = learn.get_preds(DatasetType.Test)
# submit predictions to kaggle
submit = pd.read_csv(data_path/"sample_submission.csv")
submit["target"] = preds[:, 1]
submit.to_csv(data_path/"tab.csv", index=False)