Source code for wbia_cnn.custom_layers

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
import numpy as np
import warnings
import six
import theano
import functools
from theano import tensor as T  # NOQA
import Lasagne as lasagne
from Lasagne.lasagne import init, layers, nonlinearities
from wbia_cnn import utils
import utool as ut

(print, rrr, profile) = ut.inject2(__name__)


FORCE_CPU = False  # ut.get_argflag('--force-cpu')
USING_GPU = False
try:
    if FORCE_CPU:
        raise ImportError('GPU is forced off')
    # use cuda_convnet for a speed improvement
    # will not be available without a GPU

    conv_impl = 'cuDNN'
    # conv_impl = 'cuda_convnet'
    if ut.get_computer_name().lower() == 'hyrule':
        # cuda_convnet seems broken on hyrule
        conv_impl = 'cuDNN'

    # http://lasagne.readthedocs.org/en/latest/modules/layers/conv.html#layers.Conv2DLayer

    if conv_impl == 'cuda_convnet':
        # cannot handle non-square images (pylearn2 module)
        import layers.cuda_convnet

        Conv2DLayer = layers.cuda_convnet.Conv2DCCLayer
        MaxPool2DLayer = layers.cuda_convnet.MaxPool2DCCLayer
    elif conv_impl == 'cuDNN':
        import layers.dnn

        Conv2DLayer = layers.dnn.Conv2DDNNLayer
        MaxPool2DLayer = layers.dnn.MaxPool2DDNNLayer
        """
        Need cuda convnet for background model otherwise
        <type 'exceptions.ValueError'>: GpuReshape: cannot reshape input of shape (128, 12, 26, 26) to shape (128, 676).
        Apply node that caused the error: GpuReshape{2}(GpuElemwise{Composite{((i0 * (i1 + i2)) + (i3 * Abs((i1 + i2))))}}[(0, 1)].0, TensorConstant{[128 676]})
        Toposort index: 36
        Inputs types: [CudaNdarrayType(float32, 4D), TensorType(int64, vector)]
        Inputs shapes: [(128, 12, 26, 26), (2,)]
        Inputs strides: [(676, 86528, 26, 1), (8,)]
        Inputs values: ['not shown', array([128, 676])]
        Outputs clients: [[GpuDot22(GpuReshape{2}.0, GpuReshape{2}.0)]]
        """
    elif conv_impl == 'gemm':
        # Dont use gemm
        import layers.corrmm

        Conv2DLayer = layers.corrmm.Conv2DLayer
        MaxPool2DLayer = layers.corrmm.Conv2DLayer
    else:
        raise NotImplementedError('conv_impl = %r' % (conv_impl,))

    USING_GPU = True
except (Exception, ImportError) as ex:
    Conv2DLayer = layers.Conv2DLayer
    MaxPool2DLayer = layers.MaxPool2DLayer

    if utils.VERBOSE_CNN:
        print('Conv2DLayer = %r' % (Conv2DLayer,))
        print('MaxPool2DLayer = %r' % (MaxPool2DLayer,))

    if theano.config.device != 'cpu':
        ut.printex(ex, 'WARNING: GPU seems unavailable', iswarning=True)

if utils.VERBOSE_CNN:
    print(
        'lasagne.__version__ = %r' % getattr(lasagne, '__version__', None),
    )
    print('lasagne.__file__ = %r' % (getattr(lasagne, '__file__', None),))
    print('theano.__version__ = %r' % (getattr(theano, '__version__', None),))
    print('theano.__file__ = %r' % (getattr(theano, '__file__', None),))


[docs]class L1NormalizeLayer(layers.Layer): def __init__(self, input_layer, *args, **kwargs): super(L1NormalizeLayer, self).__init__(input_layer, *args, **kwargs)
[docs] def get_output_for(self, input_, *args, **kwargs): ell1_norm = T.abs_(input_).sum(axis=1) output_ = input_ / ell1_norm[:, None] return output_
[docs]@six.add_metaclass(ut.ReloadingMetaclass) class LocallyConnected2DLayer(layers.Layer): """ Copy of the Conv2D layer that needs to be adapted into a locally connected layer Args: incoming (layers.Layer): num_filters (?): filter_size (?): stride (tuple): (default = (1, 1)) pad (int): (default = 0) untie_biases (bool): (default = False) W (GlorotUniform): (default = <init.GlorotUniform object at 0x7f551a3537d0>) b (Constant): (default = <init.Constant object at 0x7f551a33ecd0>) nonlinearity (function): (default = <function rectify at 0x7f55307989b0>) convolution (function): (default = <function conv2d at 0x7f55330148c0>) CommandLine: python -m wbia_cnn.custom_layers --exec-__init__ Example: >>> # DISABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> incoming = testdata_input_layer(item_shape=(3,8,8), batch_size=4) >>> num_filters = 64 >>> filter_size = (3, 3) >>> stride = (1, 1) >>> pad = 0 >>> untie_biases = False >>> W = init.GlorotUniform() >>> b = init.Constant(0.) >>> nonlinearity = nonlinearities.rectify >>> convolution = T.nnet.conv2d >>> self = LocallyConnected2DLayer(incoming, num_filters, filter_size, >>> stride, pad, untie_biases, W, b, >>> nonlinearity, convolution) Ignore: self.get_output_rc(self.input_shape) """ def __init__( self, incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.0), nonlinearity=nonlinearities.rectify, convolution=T.nnet.conv2d, **kwargs ): super(LocallyConnected2DLayer, self).__init__(incoming, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_filters = num_filters self.filter_size = lasagne.utils.as_tuple(filter_size, 2) self.stride = lasagne.utils.as_tuple(stride, 2) self.untie_biases = untie_biases self.convolution = convolution if pad == 'same': if any(s % 2 == 0 for s in self.filter_size): raise NotImplementedError('`same` padding requires odd filter size.') if pad == 'valid': self.pad = (0, 0) elif pad in ('full', 'same'): self.pad = pad else: self.pad = lasagne.utils.as_tuple(pad, 2, int) self.W = self.add_param(W, self.get_W_shape(), name='W') if b is None: self.b = None else: if self.untie_biases: biases_shape = (num_filters, self.output_shape[2], self.output_shape[3]) else: biases_shape = (num_filters,) self.b = self.add_param(b, biases_shape, name='b', regularizable=False)
[docs] def get_W_shape(self): """Get the shape of the weight matrix `W`. Returns ------- tuple of int The shape of the weight matrix. (should have a different conv matrix for each output node) (ie NO WEIGHT SHARING) """ num_input_channels = self.input_shape[1] output_rows, output_cols = self.get_output_rc(self.input_shape) return ( self.num_filters, num_input_channels, self.filter_size[0], self.filter_size[1], output_rows, output_cols, )
[docs] def get_output_rc(self, input_shape): pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * 2 output_rows = layers.conv.conv_output_length( input_shape[2], self.filter_size[0], self.stride[0], pad[0] ) output_columns = layers.conv.conv_output_length( input_shape[3], self.filter_size[1], self.stride[1], pad[1] ) return output_rows, output_columns
[docs] def get_output_shape_for(self, input_shape): output_rows, output_columns = self.get_output_rc(input_shape) return (input_shape[0], self.num_filters, output_rows, output_columns)
[docs] def get_output_for(self, input, input_shape=None, **kwargs): # The optional input_shape argument is for when get_output_for is # called directly with a different shape than self.input_shape. if input_shape is None: input_shape = self.input_shape if self.stride == (1, 1) and self.pad == 'same': # simulate same convolution by cropping a full convolution conved = self.convolution( input, self.W, subsample=self.stride, image_shape=input_shape, filter_shape=self.get_W_shape(), border_mode='full', ) crop_x = self.filter_size[0] // 2 crop_y = self.filter_size[1] // 2 conved = conved[:, :, crop_x : -crop_x or None, crop_y : -crop_y or None] else: # no padding needed, or explicit padding of input needed if self.pad == 'full': border_mode = 'full' pad = [(0, 0), (0, 0)] elif self.pad == 'same': border_mode = 'valid' pad = [ (self.filter_size[0] // 2, self.filter_size[0] // 2), (self.filter_size[1] // 2, self.filter_size[1] // 2), ] else: border_mode = 'valid' pad = [(self.pad[0], self.pad[0]), (self.pad[1], self.pad[1])] if pad != [(0, 0), (0, 0)]: input = lasagne.theano_extensions.padding.pad(input, pad, batch_ndim=2) input_shape = ( input_shape[0], input_shape[1], None if input_shape[2] is None else input_shape[2] + pad[0][0] + pad[0][1], None if input_shape[3] is None else input_shape[3] + pad[1][0] + pad[1][1], ) conved = self.convolution( input, self.W, subsample=self.stride, image_shape=input_shape, filter_shape=self.get_W_shape(), border_mode=border_mode, ) if self.b is None: activation = conved elif self.untie_biases: activation = conved + self.b.dimshuffle('x', 0, 1, 2) else: activation = conved + self.b.dimshuffle('x', 0, 'x', 'x') return self.nonlinearity(activation)
# @six.add_metaclass(ut.ReloadingMetaclass)
[docs]class L2NormalizeLayer(layers.Layer): """ Normalizes the outputs of a layer to have an L2 norm of 1. This is useful for siamese networks who's outputs will be comparsed using the L2 distance. """ def __init__(self, input_layer, axis=1, **kwargs): super(L2NormalizeLayer, self).__init__(input_layer, **kwargs) self.axis = axis
[docs] def get_output_for(self, input_, axis=None, T=T, **kwargs): """ CommandLine: python -m wbia_cnn.custom_layers --test-L2NormalizeLayer.get_output_for Example0: >>> # ENABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> # l2 normalization on batches of vector encodings >>> input_layer = testdata_input_layer(item_shape=(8,), batch_size=4) >>> inputdata_ = np.random.rand(*input_layer.shape).astype(np.float32) >>> axis = 1 >>> self = L2NormalizeLayer(input_layer, axis=axis) >>> # Test numpy version >>> T = np >>> input_ = inputdata_ >>> output_np = self.get_output_for(inputdata_, T=np) >>> assert np.all(np.isclose(np.linalg.norm(output_np, axis=axis), 1.0)) >>> # Test theano version >>> T = theano.tensor >>> input_expr = input_ = T.matrix(name='vector_input') >>> output_expr = self.get_output_for(input_expr, T=T) >>> output_T = output_expr.eval({input_expr: inputdata_}) >>> print(output_T) >>> assert np.all(np.isclose(output_T, output_np)), 'theano and numpy diagree' Example1: >>> # ENABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> # l2 normalization on batches of image filters >>> input_layer = testdata_input_layer(item_shape=(3, 2, 2), batch_size=4) >>> inputdata_ = np.random.rand(*input_layer.shape).astype(np.float32) >>> axis = 2 >>> self = L2NormalizeLayer(input_layer, axis=axis) >>> # Test numpy version >>> T = np >>> input_ = inputdata_ >>> output_np = self.get_output_for(inputdata_, T=np) >>> output_flat_np = output_np.reshape(np.prod(input_layer.shape[0:2]), np.prod(input_layer.shape[2:4])) >>> assert np.all(np.isclose(np.linalg.norm(output_flat_np, axis=1), 1.0)) >>> # Test theano version >>> T = theano.tensor >>> input_expr = input_ = T.tensor4(name='image_filter_input') >>> output_expr = self.get_output_for(input_expr, T=T) >>> output_T = output_expr.eval({input_expr: inputdata_}) >>> print(output_T) >>> assert np.all(np.isclose(output_T, output_np)), 'theano and numpy diagree' >>> #output_T = utils.evaluate_symbolic_layer(self.get_output_for, inputdata_, T.tensor4, T=theano.tensor) """ if axis is None: axis = self.axis input_shape = input_.shape batch_shape = input_shape[0:axis] rest_shape = input_shape[axis:] batch_size = T.prod(batch_shape) rest_size = T.prod(rest_shape) # reshape to two dimensions input_reshaped_ = input_.reshape((batch_size, rest_size)) # if T is np: # #input_reshaped_ = input_.reshape(batch_shape + (rest_size,)) # else: # # hack because I don't know how to get ndim yet # if axis == 1: # input_reshaped_ = input_.reshape(batch_shape + (rest_size,), ndim=2) # elif axis == 2: # input_reshaped_ = input_.reshape(batch_shape + (rest_size,), ndim=3) ell2_norm = T.sqrt(T.power(input_reshaped_, 2).sum(axis=-1)) if T is np: # outputreshaped_ = input_reshaped_ / ell2_norm[..., None] outputreshaped_ = input_reshaped_ / ell2_norm[:, None] output_ = outputreshaped_.reshape(input_shape) else: outputreshaped_ = input_reshaped_ / ell2_norm[:, None] output_ = outputreshaped_.reshape(input_shape) output_.name = 'l2normalized(%s)' % (input_.name) # .dimshuffle(0, 'x', 1) return output_
[docs]class L2SquaredDistanceLayer(layers.Layer): def __init__(self, input_layer, *args, **kwargs): super(L2SquaredDistanceLayer, self).__init__(input_layer, *args, **kwargs)
[docs] def get_output_shape_for(self, input_shape): return (input_shape[0] // 2,) + input_shape[1:]
[docs] def get_output_for(self, input_, *args, **kwargs): # Split batch into pairs G1, G2 = input_[0::2], input_[1::2] E = T.power((G1 - G2), 2).sum(axis=1) return E
[docs]class L1DistanceLayer(layers.Layer): def __init__(self, input_layer, *args, **kwargs): super(L1DistanceLayer, self).__init__(input_layer, *args, **kwargs)
[docs] def get_output_shape_for(self, input_shape): return (input_shape[0] // 2,) + input_shape[1:]
[docs] def get_output_for(self, input_, *args, **kwargs): # Split batch into pairs G1, G2 = input_[0::2], input_[1::2] E = T.abs_((G1 - G2)).sum(axis=1) return E
[docs]def testdata_input_layer(item_shape=(3, 32, 32), batch_size=128): input_shape = (batch_size,) + item_shape input_layer = layers.InputLayer(shape=input_shape) return input_layer
[docs]class SiameseConcatLayer(layers.Layer): """ TODO checkout layers.merge.ConcatLayer Takes two network representations in the batch and combines them along an axis. """ def __init__(self, input_layer, data_per_label=2, axis=1, **kwargs): super(SiameseConcatLayer, self).__init__(input_layer, **kwargs) self.data_per_label = data_per_label self.axis = axis
[docs] def get_output_shape_for(self, input_shape, axis=None): r""" Args: input_shape: shape being fed into this layer axis: overrideable for tests CommandLine: python -m wbia_cnn.custom_layers --test-get_output_shape_for Example0: >>> # ENABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> input_layer = testdata_input_layer(item_shape=(3, 8, 16)) >>> self = SiameseConcatLayer(input_layer) >>> input_shape = input_layer.shape >>> output_shape_list = [self.get_output_shape_for(input_shape, axis) for axis in [1, 2, 3, -3, -2, -1]] >>> result = str(output_shape_list[0:3]) + '\n' + str(output_shape_list[3:]) >>> print(result) [(64, 6, 8, 16), (64, 3, 16, 16), (64, 3, 8, 32)] [(64, 6, 8, 16), (64, 3, 16, 16), (64, 3, 8, 32)] Example1: >>> # ENABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> input_layer = testdata_input_layer(item_shape=(1024,)) >>> self = SiameseConcatLayer(input_layer) >>> input_shape = input_layer.shape >>> output_shape_list = [self.get_output_shape_for(input_shape, axis) for axis in [1, -1]] >>> result = output_shape_list >>> print(result) [(64, 2048), (64, 2048)] """ if axis is None: # allow override for tests axis = self.axis assert self.axis != 0, 'self.axis=%r cannot be 0' % (self.axis,) new_batch_shape = (input_shape[0] // self.data_per_label,) new_shape_middle = (input_shape[axis] * self.data_per_label,) if axis >= 0: shape_front = input_shape[1:axis] shape_end = input_shape[axis + 1 :] else: shape_front = input_shape[1:axis] shape_end = input_shape[len(input_shape) + axis + 1 :] output_shape = new_batch_shape + shape_front + new_shape_middle + shape_end return output_shape
[docs] def get_output_for(self, input_, T=T, **kwargs): """ CommandLine: python -m wbia_cnn.custom_layers --test-SiameseConcatLayer.get_output_for:1 --show Example0: >>> # ENABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> input_shape = (128, 1024) >>> input_layer = layers.InputLayer(shape=input_shape) >>> self = SiameseConcatLayer(input_layer) >>> np.random.seed(0) >>> input_ = np.random.rand(*input_shape) >>> T = np >>> output_ = self.get_output_for(input_, T=T) >>> target_shape = self.get_output_shape_for(input_shape) >>> result = output_.shape >>> print(result) >>> assert target_shape == result (64, 2048) Example1: >>> # DISABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> from wbia_cnn import utils >>> from wbia_cnn import draw_net >>> import theano >>> import numpy as np >>> input_layer = layers.InputLayer(shape=(4, 3, 32, 32)) >>> cs_layer = CenterSurroundLayer(input_layer) >>> # Make sure that this can concat center surround properly >>> self = SiameseConcatLayer(cs_layer, axis=2, data_per_label=4) >>> data = utils.testdata_imglist()[0] >>> inputdata_ = utils.convert_cv2_images_to_theano_images(data) >>> outputdata_ = cs_layer.get_output_for(inputdata_, T=np) >>> input_ = outputdata_ >>> output_ = self.get_output_for(input_, T=np) >>> ut.quit_if_noshow() >>> img_list = utils.convert_theano_images_to_cv2_images(output_) >>> interact_image_list(img_list, num_per_page=2) """ data_per_label = self.data_per_label split_inputs = [input_[count::data_per_label] for count in range(data_per_label)] output_ = T.concatenate(split_inputs, axis=self.axis) # input1, input2 = input_[0::2], input_[1::2] # output_ = T.concatenate([input1, input2], axis=1) return output_
[docs]def interact_image_list(img_list, num_per_page=1): # from wbia.viz import viz_helpers as vh import plottool as pt nRows, nCols = pt.get_square_row_cols(num_per_page) chunked_iter = list(ut.ichunks(img_list, num_per_page)) for img_chunks in ut.InteractiveIter(chunked_iter, display_item=False): pnum_ = pt.make_pnum_nextgen(nRows, nCols) pt.figure(fnum=1, doclf=True) for img in img_chunks: pt.imshow(img, pnum=pnum_()) # pt.draw_border(pt.gca(), color=vh.get_truth_color(label)) # pt.imshow(patch2, pnum=(1, 2, 2)) # pt.draw_border(pt.gca(), color=vh.get_truth_color(label)) pt.update()
[docs]def testdata_centersurround(item_shape): input_layer = testdata_input_layer(item_shape) data = utils.testdata_imglist(item_shape)[0] self = CenterSurroundLayer(input_layer) inputdata_ = utils.convert_cv2_images_to_theano_images(data) return self, inputdata_
[docs]class CenterSurroundLayer(layers.Layer): def __init__(self, input_layer, *args, **kwargs): # self.name = kwargs.pop('name', None) super(CenterSurroundLayer, self).__init__(input_layer, *args, **kwargs)
[docs] def get_output_shape_for(self, input_shape): batch_size, channels, height, width = input_shape if height % 2 == 1 or width % 2 == 1: warnings.warn( 'input layer to CenterSurroundLayer should ideally have an even width and height.' ) output_shape = (batch_size * 2, channels, height // 2, width // 2) return output_shape
[docs] def get_output_for(self, input_expr, T=T, **kwargs): r""" CommandLine: python -m wbia_cnn.custom_layers --test-CenterSurroundLayer.get_output_for:0 --show python -m wbia_cnn.custom_layers --test-CenterSurroundLayer.get_output_for:1 --show Example0: >>> # DISABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> import theano >>> #item_shape = (32, 32, 3) >>> item_shape = (41, 41, 3) >>> self, inputdata_ = testdata_centersurround(item_shape) >>> # Test the actual symbolic expression >>> output_T = utils.evaluate_symbolic_layer(self.get_output_for, inputdata_, T.tensor4, T=theano.tensor) >>> output_T = output_T.astype(np.uint8) >>> ut.quit_if_noshow() >>> img_list = utils.convert_theano_images_to_cv2_images(output_T) >>> interact_image_list(img_list, num_per_page=8) Example1: >>> # DISABLE_DOCTEST >>> from wbia_cnn.custom_layers import * # NOQA >>> import numpy as np >>> #item_shape = (32, 32, 3) >>> item_shape = (41, 41, 3) >>> self, input_expr = testdata_centersurround(item_shape) >>> # Test using just numpy >>> output_np = self.get_output_for(input_expr, T=np) >>> print('results agree') >>> ut.quit_if_noshow() >>> img_list = utils.convert_theano_images_to_cv2_images(output_np) >>> interact_image_list(img_list, num_per_page=8) Ignore: from wbia_cnn import draw_net #draw_net.draw_theano_symbolic_expression(result) assert np.all(output_np == output_T) np.stack = np.vstack T = np """ # Create a center and surround for each input patch # return input_ input_shape = input_expr.shape batch_size, channels, height, width = input_shape left_h = height // 4 left_w = width // 4 right_h = left_h * 3 right_w = left_w * 3 # account for odd patches total_h = left_h * 4 total_w = left_w * 4 center = input_expr[:, :, left_h:right_h, left_w:right_w] surround = input_expr[:, :, 0:total_h:2, 0:total_w:2] output_shape = self.get_output_shape_for(input_shape) if T is theano.tensor: center.name = 'center' surround.name = 'surround' # production theano version output_expr = T.alloc(0.0, *output_shape) output_expr.name = 'center_surround_alloc' set_subtensor = functools.partial(T.set_subtensor) # set_subtensor = functools.partial(T.set_subtensor, inplace=True, tolerate_inplace_aliasing=True) output_expr = set_subtensor(output_expr[::2], center) output_expr = set_subtensor(output_expr[1::2], surround) output_expr.name = 'center_surround_output' # from wbia_cnn import draw_net # draw_net.draw_theano_symbolic_expression(output_expr) else: # debugging numpy version output_expr = np.empty(output_shape, dtype=input_expr.dtype) output_expr[::2] = center output_expr[1::2] = surround # output_expr = T.concatenate([center, surround], axis=0) return output_expr
[docs]class MultiImageSliceLayer(layers.Layer): """ orig CyclicSliceLayer References: https://github.com/benanne/kaggle-ndsb/blob/master/dihedral.py#L89 This layer stacks rotations of 0, 90, 180, and 270 degrees of the input along the batch dimension. If the input has shape (batch_size, num_channels, r, c), then the output will have shape (4 * batch_size, num_channels, r, c). Note that the stacking happens on axis 0, so a reshape to (4, batch_size, num_channels, r, c) will separate the slice axis. """ def __init__(self, input_layer): super(MultiImageSliceLayer, self).__init__(input_layer)
[docs] def get_output_shape_for(self, input_shape): return (4 * input_shape[0],) + input_shape[1:]
[docs] def get_output_for(self, input_, *args, **kwargs): return lasagne.utils.concatenate( [ # array_tf_0(input_), # array_tf_90(input_), # array_tf_180(input_), # array_tf_270(input_), ], axis=0, )
[docs]class MultiImageRollLayer(layers.Layer): """ orig CyclicConvRollLayer This layer turns (n_views * batch_size, num_channels, r, c) into (n_views * batch_size, n_views * num_channels, r, c) by rolling and concatenating feature maps. It also applies the correct inverse transforms to the r and c dimensions to align the feature maps. References: https://github.com/benanne/kaggle-ndsb/blob/master/dihedral.py#L224 """ def __init__(self, input_layer): super(MultiImageRollLayer, self).__init__(input_layer) self.inv_tf_funcs = [] # array_tf_0, array_tf_270, array_tf_180, array_tf_90] self.compute_permutation_matrix()
[docs] def compute_permutation_matrix(self): map_identity = np.arange(4) map_rot90 = np.array([1, 2, 3, 0]) valid_maps = [] current_map = map_identity for k in range(4): valid_maps.append(current_map) current_map = current_map[map_rot90] self.perm_matrix = np.array(valid_maps)
[docs] def get_output_shape_for(self, input_shape): return (input_shape[0], 4 * input_shape[1]) + input_shape[2:]
[docs] def get_output_for(self, input_, *args, **kwargs): s = input_.shape input_unfolded = input_.reshape((4, s[0] // 4, s[1], s[2], s[3])) permuted_inputs = [] for p, inv_tf in zip(self.perm_matrix, self.inv_tf_funcs): input_permuted = inv_tf(input_unfolded[p].reshape(s)) permuted_inputs.append(input_permuted) return lasagne.utils.concatenate( permuted_inputs, axis=1 ) # concatenate long the channel axis
[docs]class CyclicPoolLayer(layers.Layer): """ Utility layer that unfolds the viewpoints dimension and pools over it. Note that this only makes sense for dense representations, not for feature maps (because no inverse transforms are applied to align them). """ def __init__(self, input_layer, pool_function=T.mean): super(CyclicPoolLayer, self).__init__(input_layer) self.pool_function = pool_function
[docs] def get_output_shape_for(self, input_shape): return (input_shape[0] // 4, input_shape[1])
[docs] def get_output_for(self, input_, *args, **kwargs): unfolded_input = input_.reshape((4, input_.shape[0] // 4, input_.shape[1])) return self.pool_function(unfolded_input, axis=0)
[docs]class BatchNormLayer2(layers.BatchNormLayer): """ Adds a nonlinearity to batch norm layer to reduce number of layers """ def __init__(self, incoming, nonlinearity=None, **kwargs): # this_class_now = ut.fix_super_reload_error(BatchNormLayer2, self) this_class_now = BatchNormLayer2 super(this_class_now, self).__init__(incoming, **kwargs) # super(BatchNormLayer2, self).__init__(incoming, **kwargs) self.nonlinearity = ( nonlinearities.identity if nonlinearity is None else nonlinearity )
[docs] def get_output_for(self, input, **kwargs): # this_class_now = ut.fix_super_reload_error(BatchNormLayer2, self) this_class_now = BatchNormLayer2 normalized = super(this_class_now, self).get_output_for(input, **kwargs) # normalized = super(BatchNormLayer2, self).get_output_for(input, **kwargs) normalized_activation = self.nonlinearity(normalized) return normalized_activation
[docs]class FlipLayer(layers.Layer):
[docs] def get_output_shape_for(self, input_shape): return input_shape
[docs] def get_output_for(self, input, **kwargs): return input[:, :, ::-1, ::-1]
[docs]def load_json_arch_def(arch_json_fpath): """ layer_list = layers.get_all_layers(output_layer) from wbia_cnn import net_strs layer_json_list = [net_strs.make_layer_json_dict(layer) for layer in layer_list] arch_json_fpath = '/media/raid/work/WS_ALL/_ibsdb/_ibeis_cache/nets/injur-shark_10056_224x224x3_auqbfhle/models/arch_injur-shark_o2_d11_c688_acioqbst/saved_sessions/fit_session_2016-08-26T173854+5/fit_arch_info.json' """ from wbia_cnn import custom_layers import layers.dnn # FIXME: Need to redo the saved arch json file. # Need to give layers identifiers and specify their inputs / outputs # They are not implicit arch_json = ut.load_json(arch_json_fpath) layer_json_list = arch_json['layers'] network_def_list = [] for layer_json in layer_json_list: classname = layer_json['type'] classkw = ut.delete_dict_keys(layer_json.copy(), ['type', 'output_shape']) # Rectify nonlinearity definition if 'nonlinearity' in classkw: nonlin = classkw['nonlinearity'] nonlinclass = getattr(lasagne.nonlinearities, nonlin['type'], None) nonlinkw = ut.delete_dict_keys(nonlin.copy(), ['type']) if nonlin['type'] in ['softmax', 'linear']: classkw['nonlinearity'] = nonlinclass else: classkw['nonlinearity'] = nonlinclass(**nonlinkw) classtype = None if classtype is None: classtype = getattr(lasagne.layers, classname, None) if classtype is None: classtype = getattr(layers.dnn, classname, None) if classtype is None: classtype = getattr(custom_layers, classname, None) if classtype is not None: layer_func = ut.NamedPartial(classtype, **classkw) network_def_list.append(layer_func) else: network_def_list.append(None) layer_list = custom_layers.evaluate_layer_list(network_def_list) # Hack, remove all biases before batch norm for layer in layer_list: if isinstance(layer, layers.BatchNormLayer): in_layer = layer.input_layer if in_layer is not None: if getattr(in_layer, 'b') is not None: del in_layer.params[in_layer.b] in_layer.b = None output_layer = layer_list[-1] return output_layer
[docs]def evaluate_layer_list(network_layers_def, verbose=None): r""" compiles a sequence of partial functions into a network """ if verbose is None: verbose = utils.VERBOSE_CNN total = len(network_layers_def) network_layers = [] if verbose: print('Evaluting List of %d Layers' % (total,)) layer_fn_iter = iter(network_layers_def) layer = None try: with ut.Indenter(' ' * 4, enabled=verbose): next_args = tuple() for count, layer_fn in enumerate(layer_fn_iter, start=1): if verbose: print( 'Evaluating layer %d/%d (%s) ' % ( count, total, ut.get_funcname(layer_fn), ) ) with ut.Timer(verbose=False) as tt: layer = layer_fn(*next_args) next_args = (layer,) network_layers.append(layer) if verbose: print(' * took %.4fs' % (tt.toc(),)) print(' * layer = %r' % (layer,)) if hasattr(layer, 'input_shape'): print(' * layer.input_shape = %r' % (layer.input_shape,)) if hasattr(layer, 'shape'): print(' * layer.shape = %r' % (layer.shape,)) print(' * layer.output_shape = %r' % (layer.output_shape,)) except Exception as ex: keys = [ 'layer_fn', 'layer_fn.func', 'layer_fn.args', 'layer_fn.keywords', 'layer_fn.__dict__', 'layer', 'count', ] ut.printex(ex, ('Error building layers.\n' 'layer=%r') % (layer,), keys=keys) raise return network_layers
# Bundle common layers together
[docs]def make_bundles( nonlinearity='lru', batch_norm=True, filter_size=(3, 3), stride=(1, 1), pool_stride=(2, 2), pool_size=(2, 2), branches=None, W=None, ): # FIXME; dropout is a pre-operation import Lasagne as lasagne import itertools import six if W is None: # W = init.GlorotUniform() W = init.Orthogonal('relu') # Rectify default inputs if nonlinearity == 'lru': nonlinearity = nonlinearities.LeakyRectify(leakiness=(1.0 / 10.0)) nonlinearity = nonlinearity namer = ut.partial(lambda x: str(six.next(x)), itertools.count(1)) bundles = {} def register_bundle(class_): classname = ut.get_classname(class_, local=True) bundles[classname] = class_ return class_ class Bundle(object): def __init__(self): self.suffix = namer() self.name = self.suffix def debug_layer(self, layer): if False: print('layer = %r' % layer) if hasattr(layer, 'name'): print(' * layer.name = %r' % layer.name) if hasattr(layer, 'input_shape'): print(' * layer.input_shape = %r' % (layer.input_shape,)) if hasattr(layer, 'shape'): print(' * layer.shape = %r' % (layer.shape,)) print(' * layer.output_shape = %r' % (layer.output_shape,)) def apply_dropout(self, layer): # change name standard outgoing = layers.DropoutLayer( layer, p=self.dropout, name='D' + self.name ) return outgoing def apply_batch_norm(self, layer, **kwargs): # change name standard nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = nonlinearities.identity if hasattr(layer, 'b') and layer.b is not None: del layer.params[layer.b] layer.b = None # bn_name = (kwargs.pop('name', None) or # (getattr(layer, 'name', None) and self.name + '/bn')) bn_name = layer.name + '/bn' layer = BatchNormLayer2( layer, name=bn_name, nonlinearity=nonlinearity, **kwargs ) # layer._is_main_layer = False # if nonlinearity is not None: # nonlin_name = 'g' + self.name # layer = layers.special.NonlinearityLayer(layer, # nonlinearity, # name=nonlin_name) # outgoing = layers.normalization.batch_norm(layer, **kwargs) # outgoing.input_layer.name = 'bn' + self.name # outgoing.name = 'nl' + self.name outgoing = layer return outgoing @register_bundle class InputBundle(Bundle): def __init__(self, shape, noise=False): self.shape = shape self.noise = noise super(InputBundle, self).__init__() def __call__(self): outgoing = layers.InputLayer(shape=self.shape, name='I' + self.name) if self.noise: outgoing = layers.GaussianNoiseLayer( outgoing, name='N' + self.name ) return outgoing @register_bundle class ConvBundle(Bundle): def __init__( self, num_filters, filter_size=filter_size, stride=stride, nonlinearity=nonlinearity, batch_norm=batch_norm, pool_size=pool_size, W=W, pool_stride=pool_stride, dropout=None, pool=False, preactivate=False, pad=0, name=None, ): self.num_filters = num_filters self.filter_size = filter_size self.stride = stride self.nonlinearity = nonlinearity self.batch_norm = batch_norm self.pool_size = pool_size self.pool_stride = pool_stride self.dropout = dropout self.preactivate = preactivate self.pool = pool self.pad = pad self.W = W if name is None: super(ConvBundle, self).__init__() self.name = 'C' + self.suffix else: self.name = name def __call__(self, incoming): outgoing = incoming if self.preactivate or self.batch_norm: b = None nonlinearity = None else: b = init.Constant(0) nonlinearity = self.nonlinearity if self.preactivate: outgoing = BatchNormLayer2( outgoing, nonlinearity=self.nonlinearity, name=self.name + '/_bn' ) if self.dropout is not None and self.dropout > 0: outgoing = self.apply_dropout(outgoing) outgoing = Conv2DLayer( outgoing, num_filters=self.num_filters, filter_size=self.filter_size, stride=self.stride, name=self.name, pad=self.pad, W=W, nonlinearity=nonlinearity, b=b, ) if self.batch_norm and not self.preactivate: outgoing = BatchNormLayer2( outgoing, nonlinearity=self.nonlinearity, name=self.name + '/bn_' ) if self.pool: outgoing = MaxPool2DLayer( outgoing, pool_size=self.pool_size, name=self.name + '/P', stride=self.pool_stride, ) return outgoing @register_bundle class ResidualBundle(Bundle): def __init__( self, num_filters, filter_size=filter_size, stride=stride, nonlinearity=nonlinearity, pool_size=pool_size, W=W, pool_stride=pool_stride, dropout=None, pool=False, preactivate=True, postactivate=False, ): self.num_filters = num_filters self.filter_size = filter_size self.stride = stride self.nonlinearity = nonlinearity self.pool_size = pool_size self.pool_stride = pool_stride self.dropout = dropout self.pool = pool self.W = W self.preactivate = preactivate self.postactivate = postactivate super(ResidualBundle, self).__init__() self.name = 'R' + self.name # def projectionA(l_inp): # n_filters = l_inp.output_shape[1] * 2 # def ceildiv(a, b): # return -(-a // b) # l = layers.ExpressionLayer( # l_inp, # lambda X: X[:, :, ::2, ::2], # lambda s: (s[0], s[1], ceildiv(s[2], 2), ceildiv(s[3], 2))) # l = layers.PadLayer(l, [n_filters // 4, 0, 0], batch_ndim=1) # return l def projectionB(self, incoming): """ The projection shortcut in Eqn.(2) is used to match dimensions (done by 1x1 convolutions). When the shortcuts go across feature maps of two sizes, they are performed with a stride of 2. """ # Projection is a strided 1x1 convolution. # I think preactivation should not trigger any nonlinearities. Just # batch normalization. But I haven't been able to confirm. projector = ConvBundle( filter_size=(1, 1), num_filters=self.num_filters, stride=self.stride, W=self.W, pad='same', dropout=self.dropout, name=self.name + '/proj', nonlinearity=None, preactivate=self.preactivate, ) shortcut = projector(incoming) return shortcut def __call__(self, incoming): """ https://github.com/Lasagne/Lasagne/issues/531 https://github.com/alrojo/lasagne_residual_network/search?utf8=%E2%9C%93&q=residual https://github.com/FlorianMuellerklein/Identity-Mapping-ResNet-Lasagne/blob/master/models.py """ # Check if this bundle is going to reduce the spatial dimensions size_reduced = np.prod(self.stride) != 1 # Define convolvers convkw = dict( W=self.W, pad='same', dropout=self.dropout, batch_norm=False, filter_size=self.filter_size, num_filters=self.num_filters, ) # Do not preactivate if this is the first layer in the network convolver1 = ConvBundle( stride=self.stride, preactivate=self.preactivate, name=self.name + '/C1', **convkw ) convolver2 = ConvBundle( stride=(1, 1), preactivate=True, name=self.name + '/C2', **convkw ) branch = incoming branch = convolver1(branch) branch = convolver2(branch) branch._is_main_layer = False # Need to project the shortcut branch if size_reduced: shortcut = self.projectionB(incoming) else: shortcut = incoming outgoing = layers.ElemwiseSumLayer( [branch, shortcut], name=self.name + '/sum' ) # Postactivate if this is the last residual layer. if self.postactivate: outgoing = BatchNormLayer2( outgoing, name=self.name + '/bn_', nonlinearity=nonlinearity ) if self.pool: outgoing = MaxPool2DLayer( outgoing, pool_size=self.pool_size, name=self.name + '/P', stride=self.pool_stride, ) return outgoing @register_bundle class InceptionBundle(Bundle): # https://github.com/317070/lasagne-googlenet/blob/master/googlenet.py def __init__( self, branches=branches, nonlinearity=nonlinearity, batch_norm=batch_norm, dropout=None, pool=False, pool_size=pool_size, pool_stride=pool_stride, W=W, ): # standard self.branches = branches self.nonlinearity = nonlinearity self.dropout = dropout self.batch_norm = batch_norm self.pool = pool self.pool_size = pool_size self.W = W self.pool_stride = pool_stride super(InceptionBundle, self).__init__() self.name = 'INCEP' + self.name def __call__(self, incoming): in_ = incoming if self.dropout is not None and self.dropout > 0: in_ = self.apply_dropout(in_) name = self.name # branches = self.inception_v0(in_) if self.branches is not None: branches = [] for b in self.branches: print(b) if b['t'] == 'c': branch = self.conv_branch( in_, b['s'], b['n'], b['r'], b.get('d', 1) ) elif b['t'] == 'p': branch = self.proj_branch(in_, b['s'], b['n']) else: print('b = %r' % (b,)) assert False branches.append(branch) else: # branches = self.inception_v3_A(in_) branches = self.inception_v0(in_) # print(branches) # for b in branches: # print(b.output_shape) outgoing = layers.ConcatLayer(branches, name=name + '/cat') outgoing._is_main_layer = True if self.pool: outgoing = MaxPool2DLayer( outgoing, pool_size=self.pool_size, name='P' + self.name, stride=self.pool_stride, ) return outgoing def conv_branch(self, incoming, filter_size, num_filters, num_reduce, depth=1): name = self.name name_aug = 'x'.join([str(s) for s in filter_size]) if num_reduce > 0: if False: bias = 0.1 redu = layers.NINLayer( incoming, num_units=num_reduce, W=self.W, b=init.Constant(bias), nonlinearity=self.nonlinearity, name=name + '/' + name_aug + '_reduce', ) else: redu = Conv2DLayer( incoming, num_filters=num_reduce, filter_size=(1, 1), pad=0, stride=(1, 1), nonlinearity=self.nonlinearity, W=self.W, name=name + '/' + name_aug + '_reduce', ) if self.batch_norm: redu = self.apply_batch_norm(redu) else: redu = incoming conv = redu for d in range(depth): pad = min(filter_size) // 2 conv = Conv2DLayer( conv, num_filters=num_filters, filter_size=filter_size, pad=pad, stride=(1, 1), nonlinearity=self.nonlinearity, W=self.W, name=name + '/' + name_aug + '_' + str(d), ) if d > 0: conv._is_main_layer = False if self.batch_norm: conv = self.apply_batch_norm(conv) # if num_reduce > 0: # conv._is_main_layer = False return conv def proj_branch(self, incoming, pool_size, num_proj): name = self.name flipped = FlipLayer(incoming, name=name + '/Flip') pool = MaxPool2DLayer( flipped, pool_size=pool_size, stride=(1, 1), pad=(1, 1), name=name + '/pool', ) unflipped = FlipLayer(pool, name=name + '/Unflip') project = Conv2DLayer( unflipped, num_filters=num_proj, filter_size=(1, 1), pad=0, stride=(1, 1), nonlinearity=self.nonlinearity, name=name + '/proj', ) if self.batch_norm: project = self.apply_batch_norm(project) return project def inception_v0(self, in_): # Define the 3 convolutional branches conv_branches = [ self.conv_branch(in_, (1, 1), 64, 0), self.conv_branch(in_, (3, 3), 128, 96), self.conv_branch(in_, (5, 5), 16, 16), ] # Define the projection branch proj_branches = [self.proj_branch(in_, (3, 3), 32)] branches = conv_branches + proj_branches return branches def inception_v3_A(self, in_): conv_branches = [ self.conv_branch(in_, (1, 1), 64, 0), self.conv_branch(in_, (3, 3), 128, 96), self.conv_branch(in_, (3, 3), 16, 16, depth=2), ] proj_branches = [self.proj_branch(in_, (3, 3), 32)] branches = conv_branches + proj_branches return branches @register_bundle class DenseBundle(Bundle): def __init__( self, num_units, batch_norm=batch_norm, nonlinearity=nonlinearity, W=W, dropout=None, ): self.num_units = num_units self.batch_norm = batch_norm self.nonlinearity = nonlinearity self.dropout = dropout self.W = W super(DenseBundle, self).__init__() def __call__(self, incoming): outgoing = incoming if self.dropout is not None and self.dropout > 0: outgoing = self.apply_dropout(outgoing) outgoing = layers.DenseLayer( outgoing, num_units=self.num_units, name='F' + self.name, nonlinearity=self.nonlinearity, W=self.W, ) if self.batch_norm: outgoing = self.apply_batch_norm(outgoing) return outgoing @register_bundle class SoftmaxBundle(Bundle): def __init__(self, num_units, dropout=None, W=W): self.num_units = num_units self.batch_norm = batch_norm self.dropout = dropout self.W = W super(SoftmaxBundle, self).__init__() def __call__(self, incoming): outgoing = incoming if self.dropout is not None and self.dropout > 0: outgoing = self.apply_dropout(outgoing) outgoing = layers.DenseLayer( outgoing, num_units=self.num_units, name='F' + self.name, W=self.W, nonlinearity=nonlinearities.softmax, ) return outgoing @register_bundle class NonlinearitySoftmax(Bundle): def __call__(self, incoming): return layers.NonlinearityLayer( incoming, nonlinearity=nonlinearities.softmax, name='Softmax' + self.name, ) @register_bundle class GlobalPool(Bundle): def __call__( self, incoming, ): outgoing = layers.GlobalPoolLayer( incoming, name='GP' + self.name, ) outgoing._is_main_layer = True return outgoing @register_bundle class AveragePool(Bundle): def __call__( self, incoming, ): outgoing = layers.GlobalPoolLayer( incoming, name='AP' + self.name, ) outgoing._is_main_layer = True return outgoing @register_bundle class MaxPool2D(Bundle): def __init__(self, pool_size=pool_size, pool_stride=pool_stride): self.pool_size = pool_size self.pool_stride = pool_stride super(MaxPool2D, self).__init__() def __call__(self, incoming): return MaxPool2DLayer( incoming, pool_size=self.pool_size, stride=self.pool_stride, name='P' + self.name, ) # def inception_module(l_in, num_1x1, reduce_3x3, num_3x3, reduce_5x5, # num_5x5, gain=1.0, bias=0.1): # """ # inception module (without the 3x3x1 pooling and projection because # that's difficult in Theano right now) # http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Szegedy_Going_Deeper_With_2015_CVPR_paper.pdf # """ return bundles
if __name__ == '__main__': """ CommandLine: python -m wbia_cnn.custom_layers python -m wbia_cnn.custom_layers --allexamples python -m wbia_cnn.custom_layers --allexamples --noface --nosrc """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()