Source code for Lasagne.lasagne.layers.pool

import theano.tensor as T

from .base import Layer
from ..utils import as_tuple

__all__ = [
    "MaxPool1DLayer",
    "MaxPool2DLayer",
    "MaxPool3DLayer",
    "Pool1DLayer",
    "Pool2DLayer",
    "Pool3DLayer",
    "Upscale1DLayer",
    "Upscale2DLayer",
    "Upscale3DLayer",
    "FeaturePoolLayer",
    "FeatureWTALayer",
    "GlobalPoolLayer",
    "SpatialPyramidPoolingLayer",
]


def pool_output_length(input_length, pool_size, stride, pad, ignore_border):
    """
    Compute the output length of a pooling operator
    along a single dimension.

    Parameters
    ----------
    input_length : integer
        The length of the input in the pooling dimension
    pool_size : integer
        The length of the pooling region
    stride : integer
        The stride between successive pooling regions
    pad : integer
        The number of elements to be added to the input on each side.
    ignore_border: bool
        If ``True``, partial pooling regions will be ignored.
        Must be ``True`` if ``pad != 0``.

    Returns
    -------
    output_length
        * None if either input is None.
        * Computed length of the pooling operator otherwise.

    Notes
    -----
    When ``ignore_border == True``, this is given by the number of full
    pooling regions that fit in the padded input length,
    divided by the stride (rounding down).

    If ``ignore_border == False``, a single partial pooling region is
    appended if at least one input element would be left uncovered otherwise.
    """
    if input_length is None or pool_size is None:
        return None

    if ignore_border:
        output_length = input_length + 2 * pad - pool_size + 1
        output_length = (output_length + stride - 1) // stride

    # output length calculation taken from:
    # https://github.com/Theano/Theano/blob/master/theano/tensor/signal/downsample.py
    else:
        assert pad == 0

        if stride >= pool_size:
            output_length = (input_length + stride - 1) // stride
        else:
            output_length = max(
                0, (input_length - pool_size + stride - 1) // stride) + 1

    return output_length


def pool_2d(input, **kwargs):
    """
    Wrapper function that calls :func:`theano.tensor.signal.pool_2d` either
    with the new or old keyword argument names expected by Theano.
    """
    try:
        return T.signal.pool.pool_2d(input, **kwargs)
    except TypeError:  # pragma: no cover
        # convert from new to old interface
        kwargs['ds'] = kwargs.pop('ws')
        kwargs['st'] = kwargs.pop('stride')
        kwargs['padding'] = kwargs.pop('pad')
        return T.signal.pool.pool_2d(input, **kwargs)


def pool_3d(input, **kwargs):  # pragma: no cover
    """
    Wrapper function that calls :func:`theano.tensor.signal.pool_3d` either
    with the new or old keyword argument names expected by Theano.
    """
    try:
        return T.signal.pool.pool_3d(input, **kwargs)
    except TypeError:  # pragma: no cover
        # convert from new to old interface
        kwargs['ds'] = kwargs.pop('ws')
        kwargs['st'] = kwargs.pop('stride')
        kwargs['padding'] = kwargs.pop('pad')
        return T.signal.pool.pool_3d(input, **kwargs)


class Pool1DLayer(Layer):
    """
    1D pooling layer

    Performs 1D mean or max-pooling over the trailing axis
    of a 3D input tensor.

    Parameters
    ----------
    incoming : a :class:`Layer` instance or tuple
        The layer feeding into this layer, or the expected input shape.

    pool_size : integer or iterable
        The length of the pooling region. If an iterable, it should have a
        single element.

    stride : integer, iterable or ``None``
        The stride between sucessive pooling regions.
        If ``None`` then ``stride == pool_size``.

    pad : integer or iterable
        The number of elements to be added to the input on each side.
        Must be less than stride.

    ignore_border : bool
        If ``True``, partial pooling regions will be ignored.
        Must be ``True`` if ``pad != 0``.

    mode : {'max', 'average_inc_pad', 'average_exc_pad'}
        Pooling mode: max-pooling or mean-pooling including/excluding zeros
        from partially padded pooling regions. Default is 'max'.

    **kwargs
        Any additional keyword arguments are passed to the :class:`Layer`
        superclass.

    See Also
    --------
    MaxPool1DLayer : Shortcut for max pooling layer.

    Notes
    -----
    The value used to pad the input is chosen to be less than
    the minimum of the input, so that the output of each pooling region
    always corresponds to some element in the unpadded input region.

    Using ``ignore_border=False`` prevents Theano from using cuDNN for the
    operation, so it will fall back to a slower implementation.
    """
    def __init__(self, incoming, pool_size, stride=None, pad=0,
                 ignore_border=True, mode='max', **kwargs):
        super(Pool1DLayer, self).__init__(incoming, **kwargs)

        if len(self.input_shape) != 3:
            raise ValueError("Tried to create a 1D pooling layer with "
                             "input shape %r. Expected 3 input dimensions "
                             "(batchsize, channels, 1 spatial dimensions)."
                             % (self.input_shape,))

        self.pool_size = as_tuple(pool_size, 1)
        self.stride = self.pool_size if stride is None else as_tuple(stride, 1)
        self.pad = as_tuple(pad, 1)
        self.ignore_border = ignore_border
        self.mode = mode

    def get_output_shape_for(self, input_shape):
        output_shape = list(input_shape)  # copy / convert to mutable list

        output_shape[-1] = pool_output_length(input_shape[-1],
                                              pool_size=self.pool_size[0],
                                              stride=self.stride[0],
                                              pad=self.pad[0],
                                              ignore_border=self.ignore_border,
                                              )

        return tuple(output_shape)

    def get_output_for(self, input, **kwargs):
        input_4d = T.shape_padright(input, 1)

        pooled = pool_2d(input_4d,
                         ws=(self.pool_size[0], 1),
                         stride=(self.stride[0], 1),
                         ignore_border=self.ignore_border,
                         pad=(self.pad[0], 0),
                         mode=self.mode,
                         )
        return pooled[:, :, :, 0]


class Pool2DLayer(Layer):
    """
    2D pooling layer

    Performs 2D mean or max-pooling over the two trailing axes
    of a 4D input tensor.

    Parameters
    ----------
    incoming : a :class:`Layer` instance or tuple
        The layer feeding into this layer, or the expected input shape.

    pool_size : integer or iterable
        The length of the pooling region in each dimension.  If an integer, it
        is promoted to a square pooling region. If an iterable, it should have
        two elements.

    stride : integer, iterable or ``None``
        The strides between sucessive pooling regions in each dimension.
        If ``None`` then ``stride = pool_size``.

    pad : integer or iterable
        Number of elements to be added on each side of the input
        in each dimension. Each value must be less than
        the corresponding stride.

    ignore_border : bool
        If ``True``, partial pooling regions will be ignored.
        Must be ``True`` if ``pad != (0, 0)``.

    mode : {'max', 'average_inc_pad', 'average_exc_pad'}
        Pooling mode: max-pooling or mean-pooling including/excluding zeros
        from partially padded pooling regions. Default is 'max'.

    **kwargs
        Any additional keyword arguments are passed to the :class:`Layer`
        superclass.

    See Also
    --------
    MaxPool2DLayer : Shortcut for max pooling layer.

    Notes
    -----
    The value used to pad the input is chosen to be less than
    the minimum of the input, so that the output of each pooling region
    always corresponds to some element in the unpadded input region.

    Using ``ignore_border=False`` prevents Theano from using cuDNN for the
    operation, so it will fall back to a slower implementation.
    """

    def __init__(self, incoming, pool_size, stride=None, pad=(0, 0),
                 ignore_border=True, mode='max', **kwargs):
        super(Pool2DLayer, self).__init__(incoming, **kwargs)

        self.pool_size = as_tuple(pool_size, 2)

        if len(self.input_shape) != 4:
            raise ValueError("Tried to create a 2D pooling layer with "
                             "input shape %r. Expected 4 input dimensions "
                             "(batchsize, channels, 2 spatial dimensions)."
                             % (self.input_shape,))

        if stride is None:
            self.stride = self.pool_size
        else:
            self.stride = as_tuple(stride, 2)

        self.pad = as_tuple(pad, 2)

        self.ignore_border = ignore_border
        self.mode = mode

    def get_output_shape_for(self, input_shape):
        output_shape = list(input_shape)  # copy / convert to mutable list

        output_shape[2] = pool_output_length(input_shape[2],
                                             pool_size=self.pool_size[0],
                                             stride=self.stride[0],
                                             pad=self.pad[0],
                                             ignore_border=self.ignore_border,
                                             )

        output_shape[3] = pool_output_length(input_shape[3],
                                             pool_size=self.pool_size[1],
                                             stride=self.stride[1],
                                             pad=self.pad[1],
                                             ignore_border=self.ignore_border,
                                             )

        return tuple(output_shape)

    def get_output_for(self, input, **kwargs):
        pooled = pool_2d(input,
                         ws=self.pool_size,
                         stride=self.stride,
                         ignore_border=self.ignore_border,
                         pad=self.pad,
                         mode=self.mode,
                         )
        return pooled


class Pool3DLayer(Layer):  # pragma: no cover
    """
    3D pooling layer

    Performs 3D mean or max-pooling over the three trailing axes
    of a 5D input tensor.

    Parameters
    ----------
    incoming : a :class:`Layer` instance or tuple
        The layer feeding into this layer, or the expected input shape.

    pool_size : integer or iterable
        The length of the pooling region in each dimension.  If an integer, it
        is promoted to a cubic pooling region. If an iterable, it should have
        three elements.

    stride : integer, iterable or ``None``
        The strides between sucessive pooling regions in each dimension.
        If ``None`` then ``stride = pool_size``.

    pad : integer or iterable
        Number of elements to be added on each side of the input
        in each dimension. Each value must be less than
        the corresponding stride.

    ignore_border : bool
        If ``True``, partial pooling regions will be ignored.
        Must be ``True`` if ``pad != (0, 0, 0)``.

    mode : {'max', 'average_inc_pad', 'average_exc_pad'}
        Pooling mode: max-pooling or mean-pooling including/excluding zeros
        from partially padded pooling regions. Default is 'max'.

    **kwargs
        Any additional keyword arguments are passed to the :class:`Layer`
        superclass.

    See Also
    --------
    MaxPool3DLayer : Shortcut for max pooling layer.

    Notes
    -----
    The value used to pad the input is chosen to be less than
    the minimum of the input, so that the output of each pooling region
    always corresponds to some element in the unpadded input region.

    Using ``ignore_border=False`` prevents Theano from using cuDNN for the
    operation, so it will fall back to a slower implementation.
    """

    def __init__(self, incoming, pool_size, stride=None, pad=(0, 0, 0),
                 ignore_border=True, mode='max', **kwargs):
        super(Pool3DLayer, self).__init__(incoming, **kwargs)

        self.pool_size = as_tuple(pool_size, 3)

        if len(self.input_shape) != 5:
            raise ValueError("Tried to create a 3D pooling layer with "
                             "input shape %r. Expected 5 input dimensions "
                             "(batchsize, channels, 3 spatial dim)."
                             % (self.input_shape,))

        if stride is None:
            self.stride = self.pool_size
        else:
            self.stride = as_tuple(stride, 3)

        self.pad = as_tuple(pad, 3)

        self.ignore_border = ignore_border
        self.mode = mode

    def get_output_shape_for(self, input_shape):
        output_shape = list(input_shape)  # copy / convert to mutable list

        output_shape[2] = pool_output_length(input_shape[2],
                                             pool_size=self.pool_size[0],
                                             stride=self.stride[0],
                                             pad=self.pad[0],
                                             ignore_border=self.ignore_border,
                                             )

        output_shape[3] = pool_output_length(input_shape[3],
                                             pool_size=self.pool_size[1],
                                             stride=self.stride[1],
                                             pad=self.pad[1],
                                             ignore_border=self.ignore_border,
                                             )

        output_shape[4] = pool_output_length(input_shape[4],
                                             pool_size=self.pool_size[2],
                                             stride=self.stride[2],
                                             pad=self.pad[2],
                                             ignore_border=self.ignore_border,
                                             )

        return tuple(output_shape)

    def get_output_for(self, input, **kwargs):
        pooled = pool_3d(input,
                         ws=self.pool_size,
                         stride=self.stride,
                         ignore_border=self.ignore_border,
                         pad=self.pad,
                         mode=self.mode,
                         )
        return pooled


class MaxPool1DLayer(Pool1DLayer):
    """
    1D max-pooling layer

    Performs 1D max-pooling over the trailing axis of a 3D input tensor.

    Parameters
    ----------
    incoming : a :class:`Layer` instance or tuple
        The layer feeding into this layer, or the expected input shape.

    pool_size : integer or iterable
        The length of the pooling region. If an iterable, it should have a
        single element.

    stride : integer, iterable or ``None``
        The stride between sucessive pooling regions.
        If ``None`` then ``stride == pool_size``.

    pad : integer or iterable
        The number of elements to be added to the input on each side.
        Must be less than stride.

    ignore_border : bool
        If ``True``, partial pooling regions will be ignored.
        Must be ``True`` if ``pad != 0``.

    **kwargs
        Any additional keyword arguments are passed to the :class:`Layer`
        superclass.

    Notes
    -----
    The value used to pad the input is chosen to be less than
    the minimum of the input, so that the output of each pooling region
    always corresponds to some element in the unpadded input region.

    Using ``ignore_border=False`` prevents Theano from using cuDNN for the
    operation, so it will fall back to a slower implementation.
    """

    def __init__(self, incoming, pool_size, stride=None, pad=0,
                 ignore_border=True, **kwargs):
        super(MaxPool1DLayer, self).__init__(incoming,
                                             pool_size,
                                             stride,
                                             pad,
                                             ignore_border,
                                             mode='max',
                                             **kwargs)


[docs]class MaxPool2DLayer(Pool2DLayer): """ 2D max-pooling layer Performs 2D max-pooling over the two trailing axes of a 4D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a square pooling region. If an iterable, it should have two elements. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. pad : integer or iterable Number of elements to be added on each side of the input in each dimension. Each value must be less than the corresponding stride. ignore_border : bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != (0, 0)``. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. Using ``ignore_border=False`` prevents Theano from using cuDNN for the operation, so it will fall back to a slower implementation. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0), ignore_border=True, **kwargs): super(MaxPool2DLayer, self).__init__(incoming, pool_size, stride, pad, ignore_border, mode='max', **kwargs)
# TODO: add reshape-based implementation to MaxPool*DLayer class MaxPool3DLayer(Pool3DLayer): # pragma: no cover """ 3D max-pooling layer Performs 3D max-pooling over the three trailing axes of a 5D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer or iterable The length of the pooling region in each dimension. If an integer, it is promoted to a cubic pooling region. If an iterable, it should have three elements. stride : integer, iterable or ``None`` The strides between sucessive pooling regions in each dimension. If ``None`` then ``stride = pool_size``. pad : integer or iterable Number of elements to be added on each side of the input in each dimension. Each value must be less than the corresponding stride. ignore_border : bool If ``True``, partial pooling regions will be ignored. Must be ``True`` if ``pad != (0, 0, 0)``. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- The value used to pad the input is chosen to be less than the minimum of the input, so that the output of each pooling region always corresponds to some element in the unpadded input region. Using ``ignore_border=False`` prevents Theano from using cuDNN for the operation, so it will fall back to a slower implementation. """ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0, 0), ignore_border=True, **kwargs): super(MaxPool3DLayer, self).__init__(incoming, pool_size, stride, pad, ignore_border, mode='max', **kwargs) if not hasattr(T.signal.pool, 'pool_3d'): # pragma: no cover # Hide Pool3DLayer/MaxPool3DLayer for old Theano versions del Pool3DLayer, MaxPool3DLayer __all__.remove('Pool3DLayer') __all__.remove('MaxPool3DLayer') class Upscale1DLayer(Layer): """ 1D upscaling layer Performs 1D upscaling over the trailing axis of a 3D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. scale_factor : integer or iterable The scale factor. If an iterable, it should have one element. mode : {'repeat', 'dilate'} Upscaling mode: repeat element values or upscale leaving zeroes between upscaled elements. Default is 'repeat'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. """ def __init__(self, incoming, scale_factor, mode='repeat', **kwargs): super(Upscale1DLayer, self).__init__(incoming, **kwargs) self.scale_factor = as_tuple(scale_factor, 1) if self.scale_factor[0] < 1: raise ValueError('Scale factor must be >= 1, not {0}'.format( self.scale_factor)) if mode not in {'repeat', 'dilate'}: msg = "Mode must be either 'repeat' or 'dilate', not {0}" raise ValueError(msg.format(mode)) self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list if output_shape[2] is not None: output_shape[2] *= self.scale_factor[0] return tuple(output_shape) def get_output_for(self, input, **kwargs): a, = self.scale_factor upscaled = input if self.mode == 'repeat': if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) elif self.mode == 'dilate': if a > 1: output_shape = self.get_output_shape_for(input.shape) upscaled = T.zeros(shape=output_shape, dtype=input.dtype) upscaled = T.set_subtensor(upscaled[:, :, ::a], input) return upscaled class Upscale2DLayer(Layer): """ 2D upscaling layer Performs 2D upscaling over the two trailing axes of a 4D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. scale_factor : integer or iterable The scale factor in each dimension. If an integer, it is promoted to a square scale factor region. If an iterable, it should have two elements. mode : {'repeat', 'dilate'} Upscaling mode: repeat element values or upscale leaving zeroes between upscaled elements. Default is 'repeat'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- Using ``mode='dilate'`` followed by a convolution can be realized more efficiently with a transposed convolution, see :class:`lasagne.layers.TransposedConv2DLayer`. """ def __init__(self, incoming, scale_factor, mode='repeat', **kwargs): super(Upscale2DLayer, self).__init__(incoming, **kwargs) self.scale_factor = as_tuple(scale_factor, 2) if self.scale_factor[0] < 1 or self.scale_factor[1] < 1: raise ValueError('Scale factor must be >= 1, not {0}'.format( self.scale_factor)) if mode not in {'repeat', 'dilate'}: msg = "Mode must be either 'repeat' or 'dilate', not {0}" raise ValueError(msg.format(mode)) self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list if output_shape[2] is not None: output_shape[2] *= self.scale_factor[0] if output_shape[3] is not None: output_shape[3] *= self.scale_factor[1] return tuple(output_shape) def get_output_for(self, input, **kwargs): a, b = self.scale_factor upscaled = input if self.mode == 'repeat': if b > 1: upscaled = T.extra_ops.repeat(upscaled, b, 3) if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) elif self.mode == 'dilate': if b > 1 or a > 1: output_shape = self.get_output_shape_for(input.shape) upscaled = T.zeros(shape=output_shape, dtype=input.dtype) upscaled = T.set_subtensor(upscaled[:, :, ::a, ::b], input) return upscaled class Upscale3DLayer(Layer): """ 3D upscaling layer Performs 3D upscaling over the three trailing axes of a 5D input tensor. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. scale_factor : integer or iterable The scale factor in each dimension. If an integer, it is promoted to a cubic scale factor region. If an iterable, it should have three elements. mode : {'repeat', 'dilate'} Upscaling mode: repeat element values or upscale leaving zeroes between upscaled elements. Default is 'repeat'. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. """ def __init__(self, incoming, scale_factor, mode='repeat', **kwargs): super(Upscale3DLayer, self).__init__(incoming, **kwargs) self.scale_factor = as_tuple(scale_factor, 3) if self.scale_factor[0] < 1 or self.scale_factor[1] < 1 or \ self.scale_factor[2] < 1: raise ValueError('Scale factor must be >= 1, not {0}'.format( self.scale_factor)) if mode not in {'repeat', 'dilate'}: msg = "Mode must be either 'repeat' or 'dilate', not {0}" raise ValueError(msg.format(mode)) self.mode = mode def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # copy / convert to mutable list if output_shape[2] is not None: output_shape[2] *= self.scale_factor[0] if output_shape[3] is not None: output_shape[3] *= self.scale_factor[1] if output_shape[4] is not None: output_shape[4] *= self.scale_factor[2] return tuple(output_shape) def get_output_for(self, input, **kwargs): a, b, c = self.scale_factor upscaled = input if self.mode == 'repeat': if c > 1: upscaled = T.extra_ops.repeat(upscaled, c, 4) if b > 1: upscaled = T.extra_ops.repeat(upscaled, b, 3) if a > 1: upscaled = T.extra_ops.repeat(upscaled, a, 2) elif self.mode == 'dilate': if c > 1 or b > 1 or a > 1: output_shape = self.get_output_shape_for(input.shape) upscaled = T.zeros(shape=output_shape, dtype=input.dtype) upscaled = T.set_subtensor( upscaled[:, :, ::a, ::b, ::c], input) return upscaled class FeaturePoolLayer(Layer): """ lasagne.layers.FeaturePoolLayer(incoming, pool_size, axis=1, pool_function=theano.tensor.max, **kwargs) Feature pooling layer This layer pools across a given axis of the input. By default this is axis 1, which corresponds to the feature axis for :class:`DenseLayer`, :class:`Conv1DLayer` and :class:`Conv2DLayer`. The layer can be used to implement maxout. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer the size of the pooling regions, i.e. the number of features / feature maps to be pooled together. axis : integer the axis along which to pool. The default value of ``1`` works for :class:`DenseLayer`, :class:`Conv1DLayer` and :class:`Conv2DLayer`. pool_function : callable the pooling function to use. This defaults to `theano.tensor.max` (i.e. max-pooling) and can be replaced by any other aggregation function. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer requires that the size of the axis along which it pools is a multiple of the pool size. """ def __init__(self, incoming, pool_size, axis=1, pool_function=T.max, **kwargs): super(FeaturePoolLayer, self).__init__(incoming, **kwargs) self.pool_size = pool_size self.axis = axis self.pool_function = pool_function num_feature_maps = self.input_shape[self.axis] if num_feature_maps % self.pool_size != 0: raise ValueError("Number of input feature maps (%d) is not a " "multiple of the pool size (pool_size=%d)" % (num_feature_maps, self.pool_size)) def get_output_shape_for(self, input_shape): output_shape = list(input_shape) # make a mutable copy output_shape[self.axis] = input_shape[self.axis] // self.pool_size return tuple(output_shape) def get_output_for(self, input, **kwargs): input_shape = tuple(input.shape) num_feature_maps = input_shape[self.axis] num_feature_maps_out = num_feature_maps // self.pool_size pool_shape = (input_shape[:self.axis] + (num_feature_maps_out, self.pool_size) + input_shape[self.axis+1:]) input_reshaped = input.reshape(pool_shape) return self.pool_function(input_reshaped, axis=self.axis + 1) class FeatureWTALayer(Layer): """ 'Winner Take All' layer This layer performs 'Winner Take All' (WTA) across feature maps: zero out all but the maximal activation value within a region. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_size : integer the number of feature maps per region. axis : integer the axis along which the regions are formed. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer requires that the size of the axis along which it groups units is a multiple of the pool size. """ def __init__(self, incoming, pool_size, axis=1, **kwargs): super(FeatureWTALayer, self).__init__(incoming, **kwargs) self.pool_size = pool_size self.axis = axis num_feature_maps = self.input_shape[self.axis] if num_feature_maps % self.pool_size != 0: raise ValueError("Number of input feature maps (%d) is not a " "multiple of the region size (pool_size=%d)" % (num_feature_maps, self.pool_size)) def get_output_for(self, input, **kwargs): num_feature_maps = input.shape[self.axis] num_pools = num_feature_maps // self.pool_size pool_shape = () arange_shuffle_pattern = () for k in range(self.axis): pool_shape += (input.shape[k],) arange_shuffle_pattern += ('x',) pool_shape += (num_pools, self.pool_size) arange_shuffle_pattern += ('x', 0) for k in range(self.axis + 1, input.ndim): pool_shape += (input.shape[k],) arange_shuffle_pattern += ('x',) input_reshaped = input.reshape(pool_shape) max_indices = T.argmax(input_reshaped, axis=self.axis + 1, keepdims=True) arange = T.arange(self.pool_size).dimshuffle(*arange_shuffle_pattern) mask = T.eq(max_indices, arange).reshape(input.shape) return input * mask class GlobalPoolLayer(Layer): """ lasagne.layers.GlobalPoolLayer(incoming, pool_function=theano.tensor.mean, **kwargs) Global pooling layer This layer pools globally across all trailing dimensions beyond the 2nd. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_function : callable the pooling function to use. This defaults to `theano.tensor.mean` (i.e. mean-pooling) and can be replaced by any other aggregation function. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. """ def __init__(self, incoming, pool_function=T.mean, **kwargs): super(GlobalPoolLayer, self).__init__(incoming, **kwargs) self.pool_function = pool_function def get_output_shape_for(self, input_shape): return input_shape[:2] def get_output_for(self, input, **kwargs): return self.pool_function(input.flatten(3), axis=2) def pool_2d_nxn_regions(inputs, output_size, mode='max'): """ Performs a pooling operation that results in a fixed size: output_size x output_size. Used by SpatialPyramidPoolingLayer. Refer to appendix A in [1] Parameters ---------- inputs : a tensor with 4 dimensions (N x C x H x W) output_size: integer The output size of the pooling operation mode : string Pooling mode, one of 'max', 'average_inc_pad', 'average_exc_pad' Defaults to 'max'. Returns a list of tensors, for each output bin. The list contains output_size*output_size elements, where each element is a 3D tensor (N x C x 1) References ---------- .. [1] He, Kaiming et al (2015): Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. http://arxiv.org/pdf/1406.4729.pdf. """ if mode == 'max': pooling_op = T.max elif mode in ['average_inc_pad', 'average_exc_pad']: pooling_op = T.mean else: msg = "Mode must be either 'max', 'average_inc_pad' or " msg += "'average_exc_pad'. Got '{0}'" raise ValueError(msg.format(mode)) h, w = inputs.shape[2:] result = [] n = float(output_size) for row in range(output_size): for col in range(output_size): start_h = T.floor(row / n * h).astype('int32') end_h = T.ceil((row + 1) / n * h).astype('int32') start_w = T.floor(col / n * w).astype('int32') end_w = T.ceil((col + 1) / n * w).astype('int32') pooling_region = inputs[:, :, start_h:end_h, start_w:end_w] this_result = pooling_op(pooling_region, axis=(2, 3)) result.append(this_result.dimshuffle(0, 1, 'x')) return result class SpatialPyramidPoolingLayer(Layer): """ Spatial Pyramid Pooling Layer Performs spatial pyramid pooling (SPP) over the input. It will turn a 2D input of arbitrary size into an output of fixed dimension. Hence, the convolutional part of a DNN can be connected to a dense part with a fixed number of nodes even if the dimensions of the input image are unknown. The pooling is performed over :math:`l` pooling levels. Each pooling level :math:`i` will create :math:`M_i` output features. :math:`M_i` is given by :math:`n_i * n_i`, with :math:`n_i` as the number of pooling operation per dimension in level :math:`i`, and we use a list of the :math:`n_i`'s as a parameter for SPP-Layer. The length of this list is the level of the spatial pyramid. Parameters ---------- incoming : a :class:`Layer` instance or tuple The layer feeding into this layer, or the expected input shape. pool_dims : list of integers The list of :math:`n_i`'s that define the output dimension of each pooling level :math:`i`. The length of pool_dims is the level of the spatial pyramid. mode : string Pooling mode, one of 'max', 'average_inc_pad', 'average_exc_pad' Defaults to 'max'. implementation : string Either 'fast' or 'kaiming'. The 'fast' version uses theano's pool_2d operation, which is fast but does not work for all input sizes. The 'kaiming' mode is slower but implements the pooling as described in [1], and works with any input size. **kwargs Any additional keyword arguments are passed to the :class:`Layer` superclass. Notes ----- This layer should be inserted between the convolutional part of a DNN and its dense part. Convolutions can be used for arbitrary input dimensions, but the size of their output will depend on their input dimensions. Connecting the output of the convolutional to the dense part then usually demands us to fix the dimensions of the network's InputLayer. The spatial pyramid pooling layer, however, allows us to leave the network input dimensions arbitrary. The advantage over a global pooling layer is the added robustness against object deformations due to the pooling on different scales. References ---------- .. [1] He, Kaiming et al (2015): Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. http://arxiv.org/pdf/1406.4729.pdf. """ def __init__(self, incoming, pool_dims=[4, 2, 1], mode='max', implementation='fast', **kwargs): super(SpatialPyramidPoolingLayer, self).__init__(incoming, **kwargs) if len(self.input_shape) != 4: raise ValueError("Tried to create a SPP layer with " "input shape %r. Expected 4 input dimensions " "(batchsize, channels, 2 spatial dimensions)." % (self.input_shape,)) if implementation != 'kaiming': # pragma: no cover # Check if the running theano version supports symbolic # variables as arguments for pool_2d. This is required # unless using implementation='kaiming' try: pool_2d(T.tensor4(), ws=T.ivector(), stride=T.ivector(), ignore_border=True, pad=None) except ValueError: raise ImportError("SpatialPyramidPoolingLayer with " "implementation='%s' requires a newer " "version of theano. Either update " "theano, or use implementation=" "'kaiming'" % implementation) self.mode = mode self.implementation = implementation self.pool_dims = pool_dims def get_output_for(self, input, **kwargs): input_size = tuple(symb if fixed is None else fixed for fixed, symb in zip(self.input_shape[2:], input.shape[2:])) pool_list = [] for pool_dim in self.pool_dims: if self.implementation == 'kaiming': pool_list += pool_2d_nxn_regions(input, pool_dim, mode=self.mode) else: # pragma: no cover win_size = tuple((i + pool_dim - 1) // pool_dim for i in input_size) str_size = tuple(i // pool_dim for i in input_size) pool = pool_2d(input, ws=win_size, stride=str_size, mode=self.mode, pad=None, ignore_border=True) pool = pool.flatten(3) pool_list.append(pool) return T.concatenate(pool_list, axis=2) def get_output_shape_for(self, input_shape): num_features = sum(p*p for p in self.pool_dims) return (input_shape[0], input_shape[1], num_features)