Source code for texar.torch.modules.networks.conv_networks

# Copyright 2019 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Various convolutional networks.
"""
from typing import List, Optional, Tuple, Union, Dict, Any

import torch

from texar.torch.core.layers import get_pooling_layer_hparams
from texar.torch.hyperparams import HParams
from texar.torch.modules.networks.network_base import FeedForwardNetworkBase
from texar.torch.utils.shapes import mask_sequences
from texar.torch.utils.utils import uniquify_str

__all__ = [
    "_to_list",
    "Conv1DNetwork",
]


def _to_list(value: Union[Dict[str, Any], List, Tuple, int], name=None,
             list_length=None):
    r"""Converts `hparams` value into a list.

    If :attr:`list_length` is given, then the canonicalized :attr:`value`
    must be of length :attr:`list_length`.
    """
    if not isinstance(value, (list, tuple)):
        if list_length is not None:
            value = [value] * list_length
        else:
            value = [value]
    if list_length is not None and len(value) != list_length:
        name = '' if name is None else name
        raise ValueError("hparams '%s' must be a list of length %d"
                         % (name, list_length))
    return value


[docs]class Conv1DNetwork(FeedForwardNetworkBase): r"""Simple `Conv-1D` network which consists of a sequence of convolutional layers followed with a sequence of dense layers. Args: in_channels (int): Number of channels in the input tensor. in_features (int): Size of the feature dimension in the input tensor. hparams (dict, optional): Hyperparameters. Missing hyperparameter will be set to default values. See :meth:`default_hparams` for the hyperparameter structure and default values. See :meth:`forward` for the inputs and outputs. If :attr:`"data_format"` is set to ``"channels_first"`` (this is the default), inputs must be a tensor of shape `[batch_size, channels, length]`. If :attr:`"data_format"` is set to ``"channels_last"``, inputs must be a tensor of shape `[batch_size, length, channels]`. For example, for sequence classification, `length` corresponds to time steps, and `channels` corresponds to embedding dim. Example: .. code-block:: python nn = Conv1DNetwork(in_channels=20, in_features=256) # Use the default inputs = torch.randn([64, 20, 256]) outputs = nn(inputs) # outputs == Tensor of shape [64, 256], because the final dense layer # has size 256. .. document private functions """ def __init__(self, in_channels: int, in_features: Optional[int] = None, hparams=None): super().__init__(hparams=hparams) if self.hparams.num_dense_layers > 0 and in_features is None: raise ValueError("\"in_features\" cannot be None " "if \"num_dense_layers\" > 0") # construct only non-dense layers first layer_hparams = self._build_non_dense_layer_hparams( in_channels=in_channels) self._build_layers(layers=None, layer_hparams=layer_hparams) if self.hparams.num_dense_layers > 0: if in_features is None: raise ValueError("\"in_features\" cannot be None " "if \"num_dense_layers\" > 0") ones = torch.ones(1, in_channels, in_features) input_size = self._infer_dense_layer_input_size(ones) layer_hparams = self._build_dense_hparams( in_features=input_size[1], layer_hparams=layer_hparams) self._build_layers(layers=None, layer_hparams=layer_hparams)
[docs] @staticmethod def default_hparams(): r"""Returns a dictionary of hyperparameters with default values. .. code-block:: python { # (1) Conv layers "num_conv_layers": 1, "out_channels": 128, "kernel_size": [3, 4, 5], "conv_activation": "ReLU", "conv_activation_kwargs": None, "other_conv_kwargs": {}, "data_format": "channels_first", # (2) Pooling layers "pooling": "MaxPool1d", "pool_size": None, "pool_stride": 1, "other_pool_kwargs": {}, # (3) Dense layers "num_dense_layers": 1, "out_features": 256, "dense_activation": None, "dense_activation_kwargs": None, "final_dense_activation": None, "final_dense_activation_kwargs": None, "other_dense_kwargs": None, # (4) Dropout "dropout_conv": [1], "dropout_dense": [], "dropout_rate": 0.75, # (5) Others "name": "conv1d_network" } Here: 1. For **convolutional** layers: `"num_conv_layers"`: int Number of convolutional layers. `"out_channels"`: int or list The number of out_channels in the convolution, i.e., the dimensionality of the output space. - If ``"num_conv_layers"`` > 1 and ``"out_channels"`` is an int, all convolution layers will have the same number of output channels. - If ``"num_conv_layers"`` > 1 and ``"out_channels"`` is a list, the length must equal ``"num_conv_layers"``. The number of output channels of each convolution layer will be the corresponding element from this list. `"kernel_size"`: int or list Lengths of 1D convolution windows. - If `"num_conv_layers"` = 1, this can also be a ``int`` list of arbitrary length denoting differently sized convolution windows. The number of output channels of each size is specified by ``"out_channels"``. For example, the default values will create 3 convolution layers, each of which has kernel size of 3, 4, and 5, respectively, and has output channel 128. - If `"num_conv_layers"` > 1, this must be a list of length ``"num_conv_layers"``. Each element can be an ``int`` or a ``int`` list of arbitrary length denoting the kernel size of each layer. `"conv_activation"`: str or callable Activation applied to the output of the convolutional layers. Set to `None` to maintain a linear activation. See :func:`~texar.torch.core.get_layer` for more details. `"conv_activation_kwargs"`: dict, optional Keyword arguments for the activation following the convolutional layer. See :func:`~texar.torch.core.get_layer` for more details. `"other_conv_kwargs"`: list or dict, optional Other keyword arguments for :torch_nn:`Conv1d` constructor, e.g., ``padding``. - If a dict, the same dict is applied to all the convolution layers. - If a list, the length must equal ``"num_conv_layers"``. This list can contain nested lists. If the convolution layer at index i has multiple kernel sizes, then the corresponding element of this list can also be a list of length equal to ``"kernel_size"`` at index i. If the element at index i is instead a dict, then the same dict gets applied to all the convolution layers at index i. `"data_format"`: str, optional Data format of the input tensor. Defaults to ``channels_first`` denoting the first dimension to be the channel dimension. Set it to ``channels_last`` to treat last dimension as the channel dimension. This argument can also be passed in ``forward`` function, in which case the value specified here will be ignored. 2. For **pooling** layers: `"pooling"`: str or class or instance Pooling layer after each of the convolutional layer(s). Can be a pooling layer class, its name or module path, or a class instance. `"pool_size"`: int or list, optional Size of the pooling window. If an ``int``, all pooling layer will have the same pool size. If a list, the list length must equal ``"num_conv_layers"``. If `None` and the pooling type is either :torch_docs:`MaxPool1d <nn.html#maxpool1d>` or :torch_docs:`AvgPool1d <nn.html#avgpool1d>`, the pool size will be set to input size. That is, the output of the pooling layer is a single unit. `"pool_stride"`: int or list, optional Strides of the pooling operation. If an ``int``, all layers will have the same stride. If a list, the list length must equal ``"num_conv_layers"``. `"other_pool_kwargs"`: list or dict, optional Other keyword arguments for pooling layer class constructor. - If a dict, the same dict is applied to all the pooling layers. - If a list, the length must equal ``"num_conv_layers"``. The pooling arguments for layer i will be the element at index i from this list. 3. For **dense** layers (note that here dense layers always follow convolutional and pooling layers): `"num_dense_layers"`: int Number of dense layers. `"out_features"`: int or list Dimension of features after the dense layers. If an ``int``, all dense layers will have the same feature dimension. If a list of ``int``, the list length must equal ``"num_dense_layers"``. `"dense_activation"`: str or callable Activation function applied to the output of the dense layers **except** the last dense layer output. Set to `None` to maintain a linear activation. `"dense_activation_kwargs"`: dict, optional Keyword arguments for dense layer activation functions before the last dense layer. `"final_dense_activation"`: str or callable Activation function applied to the output of the **last** dense layer. Set to `None` to maintain a linear activation. `"final_dense_activation_kwargs"`: dict, optional Keyword arguments for the activation function of last dense layer. `"other_dense_kwargs"`: dict, optional Other keyword arguments for dense layer class constructor. 4. For **dropouts**: `"dropout_conv"`: int or list The indices of convolutional layers (starting from 0) whose **inputs** are applied with dropout. The index = :attr:`num_conv_layers` means dropout applies to the final convolutional layer output. For example, .. code-block:: python { "num_conv_layers": 2, "dropout_conv": [0, 2] } will leads to a series of layers as `-dropout-conv0-conv1-dropout-`. The dropout mode (training or not) is controlled by :attr:`self.training`. `"dropout_dense"`: int or list Same as ``"dropout_conv"`` but applied to dense layers (index starting from 0). `"dropout_rate"`: float The dropout rate, between 0 and 1. For example, ``"dropout_rate": 0.1`` would drop out 10% of elements. 5. Others: `"name"`: str Name of the network. """ return { # (1) Conv layers "num_conv_layers": 1, "out_channels": 128, "kernel_size": [3, 4, 5], "conv_activation": "ReLU", "conv_activation_kwargs": None, "other_conv_kwargs": {}, "data_format": "channels_first", # (2) Pooling layers "pooling": "MaxPool1d", "pool_size": None, "pool_stride": 1, "other_pool_kwargs": {}, # (3) Dense layers "num_dense_layers": 1, "out_features": 256, "dense_activation": None, "dense_activation_kwargs": None, "final_dense_activation": None, "final_dense_activation_kwargs": None, "other_dense_kwargs": None, # (4) Dropout "dropout_conv": [1], "dropout_dense": [], "dropout_rate": 0.75, # (5) Others "name": "conv1d_network", "@no_typecheck": ["out_channels", "kernel_size", "conv_activation", "other_conv_kwargs", "pool_size", "pool_stride", "other_pool_kwargs", "out_features", "dense_activation", "dropout_conv", "dropout_dense"] }
def _build_pool_hparams(self): pool_type = self._hparams.pooling if pool_type == "MaxPool": pool_type = "MaxPool1d" elif pool_type == "AvgPool": pool_type = "AvgPool1d" npool = self._hparams.num_conv_layers kernel_size = _to_list(self._hparams.pool_size, "pool_size", npool) stride = _to_list(self._hparams.pool_stride, "pool_stride", npool) other_kwargs = self._hparams.other_pool_kwargs if isinstance(other_kwargs, HParams): other_kwargs = other_kwargs.todict() other_kwargs = _to_list(other_kwargs, "other_kwargs", npool) elif isinstance(other_kwargs, (list, tuple)): if len(other_kwargs) != npool: raise ValueError("The length of hparams['other_pool_kwargs'] " "must equal 'num_conv_layers'") else: raise ValueError("hparams['other_pool_kwargs'] must be either a " "dict or list/tuple") pool_hparams = [] for i in range(npool): kwargs_i = {"kernel_size": kernel_size[i], "stride": stride[i]} kwargs_i.update(other_kwargs[i]) pool_hparams_ = get_pooling_layer_hparams({"type": pool_type, "kwargs": kwargs_i}) pool_hparams.append(pool_hparams_) return pool_hparams def _build_conv1d_hparams(self, in_channels, pool_hparams): r"""Creates the hparams for each of the convolutional layers usable for :func:`texar.torch.core.layers.get_layer`. """ nconv = self._hparams.num_conv_layers if len(pool_hparams) != nconv: raise ValueError("`pool_hparams` must be of length %d" % nconv) in_channels = [in_channels] out_channels = _to_list(self._hparams.out_channels, 'out_channels', nconv) # because in_channels(i) = out_channels(i-1) in_channels.extend(out_channels[:-1]) if nconv == 1: kernel_size = _to_list(self._hparams.kernel_size) if not isinstance(kernel_size[0], (list, tuple)): kernel_size = [kernel_size] elif nconv > 1: kernel_size = _to_list(self._hparams.kernel_size, 'kernel_size', nconv) kernel_size = [_to_list(ks) for ks in kernel_size] other_kwargs = self._hparams.other_conv_kwargs if isinstance(other_kwargs, HParams): other_kwargs = other_kwargs.todict() other_kwargs = _to_list(other_kwargs, "other_conv_kwargs", nconv) elif isinstance(other_kwargs, (list, tuple)): if len(other_kwargs) != nconv: raise ValueError("The length of hparams['other_conv_kwargs'] " "must be equal to 'num_conv_layers'") else: raise ValueError("hparams['other_conv_kwargs'] must be a either " "a dict or a list.") def _activation_hparams(name, kwargs=None): if kwargs is not None: return {"type": name, "kwargs": kwargs} else: return {"type": name, "kwargs": {}} conv_pool_hparams = [] for i in range(nconv): hparams_i = [] names = [] if isinstance(other_kwargs[i], dict): other_kwargs[i] = _to_list(other_kwargs[i], "other_kwargs[i]", len(kernel_size[i])) elif (isinstance(other_kwargs[i], (list, tuple)) and len(other_kwargs[i]) != len(kernel_size[i])): raise ValueError("The length of hparams['other_conv_kwargs'][i]" " must be equal to the length of " "hparams['kernel_size'][i]") for idx, ks_ij in enumerate(kernel_size[i]): name = uniquify_str("conv_%d" % (i + 1), names) names.append(name) conv_kwargs_ij = { "in_channels": in_channels[i], "out_channels": out_channels[i], "kernel_size": ks_ij } conv_kwargs_ij.update(other_kwargs[i][idx]) hparams_i.append( {"type": "Conv1d", "kwargs": conv_kwargs_ij}) if len(hparams_i) == 1: if self._hparams.conv_activation: layers = { "layers": [hparams_i[0], _activation_hparams( self._hparams.conv_activation, self._hparams.conv_activation_kwargs)]} sequential_layer = {"type": "Sequential", "kwargs": layers} conv_pool_hparams.append([sequential_layer, pool_hparams[i]]) else: conv_pool_hparams.append([hparams_i[0], pool_hparams[i]]) else: # creates MergeLayer mrg_kwargs_layers = [] for hparams_ij in hparams_i: if self._hparams.conv_activation: seq_kwargs_j = { "layers": [ hparams_ij, _activation_hparams( self._hparams.conv_activation, self._hparams.conv_activation_kwargs), pool_hparams[i] ] } else: seq_kwargs_j = {"layers": [hparams_ij, pool_hparams[i]]} mrg_kwargs_layers.append( {"type": "Sequential", "kwargs": seq_kwargs_j}) mrg_hparams = {"type": "MergeLayer", "kwargs": {"layers": mrg_kwargs_layers}} conv_pool_hparams.append(mrg_hparams) return conv_pool_hparams def _build_dense_hparams(self, in_features: int, layer_hparams): ndense = self._hparams.num_dense_layers in_features = [in_features] out_features = _to_list(self._hparams.out_features, 'out_features', ndense) # because in_features(i) = out_features(i-1) in_features.extend(out_features[:-1]) other_kwargs = self._hparams.other_dense_kwargs or {} if isinstance(other_kwargs, HParams): other_kwargs = other_kwargs.todict() if not isinstance(other_kwargs, dict): raise ValueError("hparams['other_dense_kwargs'] must be a dict.") def _activation_hparams(name, kwargs=None): if kwargs is not None: return {"type": name, "kwargs": kwargs} else: return {"type": name, "kwargs": {}} dense_hparams = [] for i in range(ndense): kwargs_i = {"in_features": in_features[i], "out_features": out_features[i]} kwargs_i.update(other_kwargs) dense_hparams_i = {"type": "Linear", "kwargs": kwargs_i} if i < ndense - 1 and self._hparams.dense_activation is not None: layers = { "layers": [dense_hparams_i, _activation_hparams( self._hparams.dense_activation, self._hparams.dense_activation_kwargs) ]} sequential_layer = {"type": "Sequential", "kwargs": layers} dense_hparams.append(sequential_layer) elif (i == ndense - 1 and self._hparams.final_dense_activation is not None): layers = { "layers": [dense_hparams_i, _activation_hparams( self._hparams.final_dense_activation, self._hparams.final_dense_activation_kwargs) ]} sequential_layer = {"type": "Sequential", "kwargs": layers} dense_hparams.append(sequential_layer) else: dense_hparams.append(dense_hparams_i) def _dropout_hparams(): return {"type": "Dropout", "kwargs": {"p": self._hparams.dropout_rate}} dropout_dense = _to_list(self._hparams.dropout_dense) ndense = self._hparams.num_dense_layers if ndense > 0: # Add flatten layers before dense layers layer_hparams.append({"type": "Flatten"}) for dense_i in range(ndense): if dense_i in dropout_dense: layer_hparams.append(_dropout_hparams()) layer_hparams.append(dense_hparams[dense_i]) if ndense in dropout_dense: layer_hparams.append(_dropout_hparams()) return layer_hparams def _build_non_dense_layer_hparams(self, in_channels): pool_hparams = self._build_pool_hparams() conv_pool_hparams = self._build_conv1d_hparams(in_channels, pool_hparams) def _dropout_hparams(): return {"type": "Dropout", "kwargs": {"p": self._hparams.dropout_rate}} dropout_conv = _to_list(self._hparams.dropout_conv) layers_hparams = [] nconv = self._hparams.num_conv_layers for conv_i in range(nconv): if conv_i in dropout_conv: layers_hparams.append(_dropout_hparams()) if isinstance(conv_pool_hparams[conv_i], (list, tuple)): layers_hparams += conv_pool_hparams[conv_i] else: layers_hparams.append(conv_pool_hparams[conv_i]) if nconv in dropout_conv: layers_hparams.append(_dropout_hparams()) return layers_hparams
[docs] def forward(self, # type: ignore input: torch.Tensor, sequence_length: Optional[Union[torch.LongTensor, List[int]]] = None, dtype: Optional[torch.dtype] = None, data_format: Optional[str] = None) -> torch.Tensor: r"""Feeds forward inputs through the network layers and returns outputs. Args: input: The inputs to the network, which is a 3D tensor. sequence_length (optional): An :tensor:`LongTensor` of shape ``[batch_size]`` or a python array containing the length of each element in :attr:`inputs`. If given, time steps beyond the length will first be masked out before feeding to the layers. dtype (optional): Type of the inputs. If not provided, infers from inputs automatically. data_format (optional): Data type of the input tensor. If ``channels_last``, the last dimension will be treated as channel dimension so the size of the :attr:`input` should be `[batch_size, X, channel]`. If ``channels_first``, first dimension will be treated as channel dimension so the size should be `[batch_size, channel, X]`. Defaults to None. If None, the value will be picked from hyperparameters. Returns: The output of the final layer. """ if input.dim() != 3: raise ValueError("'input' should be a 3D tensor.") if data_format is None: data_format = self.hparams["data_format"] if data_format == "channels_first": # masking requires channels in last dimension input = input.permute(0, 2, 1) if sequence_length is not None: input = mask_sequences(input, sequence_length, dtype=dtype, time_major=False) # network is constructed for channel first tensors input = input.permute(0, 2, 1) output = super().forward(input) elif data_format == "channels_last": if sequence_length is not None: input = mask_sequences(input, sequence_length, dtype=dtype, time_major=False) input = input.permute(0, 2, 1) output = super().forward(input) # transpose only when tensors are 3D if output.dim() == 3: output = output.permute(0, 2, 1) else: raise ValueError("Invalid 'data_format'") return output
def _infer_dense_layer_input_size(self, input: torch.Tensor) -> torch.Size: # feed forward the input on the conv part of the network to infer # input shape for dense layers with torch.no_grad(): output = super().forward(input) return output.view(output.size()[0], -1).size() @property def output_size(self) -> int: r"""The feature size of :meth:`forward` output. """ if self.hparams.num_dense_layers <= 0: out_channels = self._hparams.out_channels if not isinstance(out_channels, (list, tuple)): out_channels = [out_channels] nconv = self._hparams.num_conv_layers if nconv == 1: kernel_size = _to_list(self._hparams.kernel_size) if not isinstance(kernel_size[0], (list, tuple)): kernel_size = [kernel_size] elif nconv > 1: kernel_size = _to_list(self._hparams.kernel_size, 'kernel_size', nconv) kernel_size = [_to_list(ks) for ks in kernel_size] return out_channels[-1] * len(kernel_size[-1]) else: out_features = self._hparams.out_features if isinstance(out_features, (list, tuple)): return out_features[-1] else: return out_features