# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
import functools
import six
import numpy as np
import utool as ut
from wbia_cnn import ingest_data
from Lasagne import lasagne
from Lasagne.lasagne import layers, nonlinearities
from theano import tensor as T # NOQA
from wbia_cnn.models import abstract_models
print, rrr, profile = ut.inject2(__name__)
[docs]class NonlinearityLayerSpatial(lasagne.layers.NonlinearityLayer):
def __init__(self, incoming, nonlinearity=nonlinearities.rectify, **kwargs):
"""The spatial version of a nonlinearity as applied accross all spatial
dimensions of a network's output.
"""
super(NonlinearityLayerSpatial, self).__init__(incoming, **kwargs)
self.nonlinearity = (
nonlinearities.identity if nonlinearity is None else nonlinearity
)
in_batch, in_channels, in_width, in_height = self.input_shape
self.reshape_required = in_width == 1 and in_height == 1
[docs] def get_output_for(self, input, **kwargs):
old_shape = T.shape(input)
if self.reshape_required:
input = T.reshape(input, (-1, old_shape[1]))
return self.nonlinearity(input)
elif input.ndim == 4:
input = input.dimshuffle((0, 3, 2, 1))
temp = T.shape(input)
input = T.reshape(input, (-1, old_shape[1]))
activation = self.nonlinearity(input)
activation = T.reshape(activation, temp)
activation = activation.dimshuffle((0, 3, 2, 1)) # Transpose
return activation
else:
_super = super(NonlinearityLayerSpatial, self)
return _super.get_output_for(input, **kwargs)
[docs] def get_output_shape_for(self, input_shape):
if self.reshape_required:
return input_shape[:2]
else:
_super = super(NonlinearityLayerSpatial, self)
return _super.get_output_shape_for(input_shape)
[docs]@six.add_metaclass(ut.ReloadingMetaclass)
class BackgroundModel(abstract_models.AbstractCategoricalModel):
def __init__(
model,
autoinit=False,
batch_size=128,
data_shape=(48, 48, 3),
num_output=2,
**kwargs
):
model.num_output = num_output
super(BackgroundModel, model).__init__(
batch_size=batch_size, data_shape=data_shape, name='background', **kwargs
)
[docs] def learning_rate_update(model, x):
return x / 2.0
[docs] def learning_rate_shock(model, x):
return x * 2.0
[docs] def augment(model, Xb, yb=None):
import random
import cv2
for index, X in enumerate(Xb):
if random.uniform(0.0, 1.0) <= 0.5:
Xb[index] = cv2.flip(X, 1)
return Xb, yb
[docs] def get_background_def(model, verbose=ut.VERBOSE, **kwargs):
# _CaffeNet = abstract_models.PretrainedNetwork('caffenet')
_P = functools.partial
hidden_initkw = {
'nonlinearity': nonlinearities.LeakyRectify(leakiness=(1.0 / 10.0))
}
from wbia_cnn import custom_layers
Conv2DLayer = custom_layers.Conv2DLayer
MaxPool2DLayer = custom_layers.MaxPool2DLayer
# DenseLayer = layers.DenseLayer
network_layers_def = [
_P(layers.InputLayer, shape=model.input_shape),
_P(
Conv2DLayer,
num_filters=16,
filter_size=(11, 11),
name='C0',
**hidden_initkw
),
_P(layers.DropoutLayer, p=0.1, name='D0'),
_P(MaxPool2DLayer, pool_size=(2, 2), stride=(2, 2), name='P0'),
_P(
Conv2DLayer,
num_filters=32,
filter_size=(5, 5),
name='C1',
**hidden_initkw
),
_P(layers.DropoutLayer, p=0.2, name='D1'),
_P(MaxPool2DLayer, pool_size=(2, 2), stride=(2, 2), name='P1'),
_P(
Conv2DLayer,
num_filters=64,
filter_size=(3, 3),
name='C2',
**hidden_initkw
),
_P(layers.DropoutLayer, p=0.3, name='D2'),
_P(MaxPool2DLayer, pool_size=(2, 2), stride=(2, 2), name='P2'),
_P(
Conv2DLayer,
num_filters=128,
filter_size=(3, 3),
name='C4',
**hidden_initkw
),
_P(layers.DropoutLayer, p=0.4, name='D4'),
_P(layers.NINLayer, num_units=model.num_output, name='F3', nonlinearity=None),
_P(NonlinearityLayerSpatial, name='S0', nonlinearity=nonlinearities.softmax),
]
return network_layers_def
[docs] def init_arch(model, verbose=ut.VERBOSE, **kwargs):
r""""""
(_, input_channels, input_width, input_height) = model.input_shape
if verbose:
print('[model] Initialize background model architecture')
print('[model] * batch_size = %r' % (model.batch_size,))
print('[model] * input_width = %r' % (input_width,))
print('[model] * input_height = %r' % (input_height,))
print('[model] * input_channels = %r' % (input_channels,))
print('[model] * output_dims = %r' % (model.output_dims,))
network_layers_def = model.get_background_def(verbose=verbose, **kwargs)
# connect and record layers
from wbia_cnn import custom_layers
network_layers = custom_layers.evaluate_layer_list(
network_layers_def, verbose=verbose
)
# model.network_layers = network_layers
output_layer = network_layers[-1]
model.output_layer = output_layer
return output_layer
[docs]def train_background(output_path, data_fpath, labels_fpath):
r"""
CommandLine:
python -m wbia_cnn.train --test-train_background
Example:
>>> # DISABLE_DOCTEST
>>> from wbia_cnn.train import * # NOQA
>>> result = train_background()
>>> print(result)
"""
era_size = 8
max_epochs = 128
hyperparams = ut.argparse_dict(
{
'era_size': era_size,
'era_clean': True,
'batch_size': 128,
'learning_rate': 0.01,
'momentum': 0.9,
'weight_decay': 0.0005,
'augment_on': True,
'whiten_on': True,
'max_epochs': max_epochs,
}
)
ut.colorprint('[netrun] Ensuring Dataset', 'yellow')
dataset = ingest_data.get_numpy_dataset2(
'background', data_fpath, labels_fpath, output_path
)
print('dataset.training_dpath = %r' % (dataset.training_dpath,))
ut.colorprint('[netrun] Architecture Specification', 'yellow')
model = BackgroundModel(
data_shape=dataset.data_shape,
training_dpath=dataset.training_dpath,
**hyperparams
)
ut.colorprint('[netrun] Initialize archchitecture', 'yellow')
model.init_arch()
ut.colorprint('[netrun] * Initializing new weights', 'lightgray')
if model.has_saved_state():
model.load_model_state()
else:
model.reinit_weights()
# ut.colorprint('[netrun] Need to initialize training state', 'yellow')
# X_train, y_train = dataset.subset('train')
# model.ensure_data_params(X_train, y_train)
ut.colorprint('[netrun] Training Requested', 'yellow')
# parse training arguments
config = ut.argparse_dict(
dict(
era_size=era_size,
max_epochs=max_epochs,
show_confusion=False,
)
)
model.monitor_config.update(**config)
X_train, y_train = dataset.subset('train')
X_valid, y_valid = dataset.subset('valid')
ut.colorprint('[netrun] Init encoder and convert labels', 'yellow')
if hasattr(model, 'init_encoder'):
model.init_encoder(y_train)
if getattr(model, 'encoder', None) is not None:
class_list = list(model.encoder.classes_)
y_train = np.array([class_list.index(_) for _ in y_train])
y_valid = np.array([class_list.index(_) for _ in y_valid])
ut.colorprint('[netrun] Begin training', 'yellow')
model.fit(X_train, y_train, X_valid=X_valid, y_valid=y_valid)
model_path = model.save_model_state()
return model_path
if __name__ == '__main__':
"""
CommandLine:
python -m wbia_cnn.models.background
python -m wbia_cnn.models.background --allexamples
python -m wbia_cnn.models.background --allexamples --noface --nosrc
"""
import multiprocessing
multiprocessing.freeze_support() # for win32
import utool as ut # NOQA
ut.doctest_funcs()