# Copyright 2019 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Various RNN encoders.
"""
from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar, Union
import torch
import torch.nn as nn
from texar.torch.core import layers
from texar.torch.core.cell_wrappers import LSTMCell, RNNCellBase
from texar.torch.hyperparams import HParams
from texar.torch.modules.encoders.encoder_base import EncoderBase
from texar.torch.modules.networks.conv_networks import _to_list
from texar.torch.utils.rnn import bidirectional_dynamic_rnn, dynamic_rnn
from texar.torch.utils.shapes import mask_sequences
__all__ = [
"_forward_output_layers",
"RNNEncoderBase",
"UnidirectionalRNNEncoder",
"BidirectionalRNNEncoder",
]
State = TypeVar('State')
def _default_output_layer_hparams() -> Dict[str, Any]:
return {
"num_layers": 0,
"layer_size": 128,
"activation": "Identity",
"final_layer_activation": None,
"other_dense_kwargs": None,
"dropout_layer_ids": [],
"dropout_rate": 0.5,
"variational_dropout": False,
"@no_typecheck": ["activation", "final_layer_activation",
"layer_size", "dropout_layer_ids"]
}
def _build_dense_output_layer(cell_output_size: int,
hparams: HParams) -> Optional[nn.Sequential]:
r"""Build the output layers.
Args:
cell_output_size: The output size of the rnn cell.
hparams (dict or HParams): Hyperparameters. Missing hyperparameters
will be set to default values. See
:meth:`default_hparams` for the hyperparameter structure and
default values.
Returns:
A :torch_nn:`Sequential` module containing the output layers.
"""
nlayers = hparams.num_layers
if nlayers <= 0:
return None
layer_size = _to_list(
hparams.layer_size, 'output_layer.layer_size', nlayers)
dropout_layer_ids = _to_list(hparams.dropout_layer_ids)
other_kwargs = hparams.other_dense_kwargs or {}
if isinstance(other_kwargs, HParams):
other_kwargs = other_kwargs.todict()
if not isinstance(other_kwargs, dict):
raise ValueError(
"hparams 'output_layer.other_dense_kwargs' must be a dict.")
output_layers: List[nn.Module] = []
for i in range(nlayers):
if i in dropout_layer_ids:
# TODO: Variational dropout is not implemented.
output_layers.append(nn.Dropout(p=hparams.dropout_rate))
dense_layer = nn.Linear(in_features=(cell_output_size if i == 0
else layer_size[i - 1]),
out_features=layer_size[i], **other_kwargs)
output_layers.append(dense_layer)
if i == nlayers - 1:
activation = hparams.final_layer_activation
else:
activation = hparams.activation
if activation is not None:
layer_hparams = {"type": activation, "kwargs": {}}
activation_layer = layers.get_layer(hparams=layer_hparams)
output_layers.append(activation_layer)
if nlayers in dropout_layer_ids:
output_layers.append(nn.Dropout(p=hparams.dropout_rate))
return nn.Sequential(*output_layers)
def _forward_output_layers(
inputs: torch.Tensor,
output_layer: Optional[nn.Module],
time_major: bool,
sequence_length: Optional[Union[torch.LongTensor, List[int]]] = None) \
-> Tuple[torch.Tensor, int]:
r"""Forwards inputs through the output layers.
Args:
inputs: A Tensor of shape ``[batch_size, max_time] + input_size`` if
:attr:`time_major` is `False`, or shape
``[max_time, batch_size] + input_size`` if :attr:`time_major` is
`True`.
output_layer (optional): :torch_nn:`Sequential` or :torch_nn:`Module`
of output layers.
time_major (bool): The shape format of the :attr:`inputs` and
:attr:`outputs` Tensors. If `True`, these tensors are of shape
`[max_time, batch_size, input_size]`. If `False` (default),
these tensors are of shape `[batch_size, max_time, input_size]`.
sequence_length (optional): A 1D :tensor:`LongTensor` of shape
``[batch_size]``. Sequence lengths of the batch inputs. Used to
copy-through state and zero-out outputs when past a batch element's
sequence length.
Returns:
A pair :attr:`(outputs, outputs_size), where
- :attr:`outputs`: A Tensor of shape
`[batch_size, max_time] + outputs_size`.
- :attr:`outputs_size`: An `int` representing the output size.
"""
if output_layer is None:
return inputs, inputs.shape[-1]
output = output_layer(inputs)
if sequence_length is not None:
output = mask_sequences(output, sequence_length, time_major=time_major)
output_size = output.shape[-1]
return output, output_size
[docs]class RNNEncoderBase(EncoderBase, Generic[State]):
r"""Base class for all RNN encoder classes to inherit.
Args:
hparams (dict or HParams, optional): Hyperparameters. Missing
hyperparameters will be set to default values. See
:meth:`default_hparams` for the hyperparameter structure and
default values.
"""
[docs] @staticmethod
def default_hparams():
r"""Returns a dictionary of hyperparameters with default values.
.. code-block:: python
{
"name": "rnn_encoder"
}
"""
return {
'name': 'rnn_encoder'
}
[docs]class UnidirectionalRNNEncoder(RNNEncoderBase[State]):
r"""One directional RNN encoder.
Args:
input_size (int): The number of expected features in the input for the
cell.
cell: (RNNCell, optional) If not specified,
a cell is created as specified in :attr:`hparams["rnn_cell"]`.
output_layer (optional): An instance of
:torch_nn:`Module`. Applies to the RNN cell
output of each step. If `None` (default), the output layer is
created as specified in :attr:`hparams["output_layer"]`.
hparams (dict or HParams, optional): Hyperparameters. Missing
hyperparameters will be set to default values. See
:meth:`default_hparams` for the hyperparameter structure and
default values.
See :meth:`forward` for the inputs and outputs of the encoder.
Example:
.. code-block:: python
# Use with embedder
embedder = WordEmbedder(vocab_size, hparams=emb_hparams)
encoder = UnidirectionalRNNEncoder(hparams=enc_hparams)
outputs, final_state = encoder(
inputs=embedder(data_batch['text_ids']),
sequence_length=data_batch['length'])
.. document private functions
"""
_cell: RNNCellBase[State]
def __init__(self,
input_size: int,
cell: Optional[RNNCellBase[State]] = None,
output_layer: Optional[nn.Module] = None,
hparams=None):
super().__init__(hparams=hparams)
# Make RNN cell
if cell is not None:
self._cell = cell
else:
self._cell = layers.get_rnn_cell(input_size,
self._hparams.rnn_cell)
# Make output layer
self._output_layer: Optional[nn.Module]
if output_layer is not None:
self._output_layer = output_layer
self._output_layer_hparams = None
else:
self._output_layer = _build_dense_output_layer(
self._cell.hidden_size, self._hparams.output_layer)
self._output_layer_hparams = self._hparams.output_layer
[docs] @staticmethod
def default_hparams() -> Dict[str, Any]:
r"""Returns a dictionary of hyperparameters with default values.
.. code-block:: python
{
"rnn_cell": default_rnn_cell_hparams(),
"output_layer": {
"num_layers": 0,
"layer_size": 128,
"activation": "identity",
"final_layer_activation": None,
"other_dense_kwargs": None,
"dropout_layer_ids": [],
"dropout_rate": 0.5,
"variational_dropout": False
},
"name": "unidirectional_rnn_encoder"
}
Here:
`"rnn_cell"`: dict
A dictionary of RNN cell hyperparameters. Ignored if
:attr:`cell` is given to the encoder constructor.
The default value is defined in
:func:`~texar.torch.core.default_rnn_cell_hparams`.
`"output_layer"`: dict
Output layer hyperparameters. Ignored if :attr:`output_layer`
is given to the encoder constructor. Includes:
`"num_layers"`: int
The number of output (dense) layers. Set to 0 to avoid any
output layers applied to the cell outputs.
`"layer_size"`: int or list
The size of each of the output (dense) layers.
If an `int`, each output layer will have the same size. If
a list, the length must equal to :attr:`num_layers`.
`"activation"`: str or callable or None
Activation function for each of the output (dense)
layer except for the final layer. This can be
a function, or its string name or module path.
If function name is given, the function must be from
:mod:`torch.nn`.
For example:
.. code-block:: python
"activation": "relu" # function name
"activation": "my_module.my_activation_fn" # module path
"activation": my_module.my_activation_fn # function
Default is `None` which results in an identity activation.
`"final_layer_activation"`: str or callable or None
The activation function for the final output layer.
`"other_dense_kwargs"`: dict or None
Other keyword arguments to construct each of the output
dense layers, e.g., ``bias``. See
:torch_nn:`Linear` for the keyword arguments.
`"dropout_layer_ids"`: int or list
The indexes of layers (starting from 0) whose inputs
are applied with dropout. The index = :attr:`num_layers`
means dropout applies to the final layer output. For example,
.. code-block:: python
{
"num_layers": 2,
"dropout_layer_ids": [0, 2]
}
will leads to a series of layers as
`-dropout-layer0-layer1-dropout-`.
The dropout mode (training or not) is controlled
by :attr:`self.training`.
`"dropout_rate"`: float
The dropout rate, between 0 and 1. For example,
``"dropout_rate": 0.1`` would zero out 10% of elements.
`"variational_dropout"`: bool
Whether the dropout mask is the same across all time steps.
`"name"`: str
Name of the encoder
"""
hparams = RNNEncoderBase.default_hparams()
hparams.update({
"rnn_cell": layers.default_rnn_cell_hparams(),
"output_layer": _default_output_layer_hparams(),
"name": "unidirectional_rnn_encoder"
})
return hparams
[docs] def forward(self, # type: ignore
inputs: torch.Tensor,
sequence_length: Optional[Union[torch.LongTensor,
List[int]]] = None,
initial_state: Optional[State] = None,
time_major: bool = False,
return_cell_output: bool = False,
return_output_size: bool = False):
r"""Encodes the inputs.
Args:
inputs: A 3D Tensor of shape ``[batch_size, max_time, dim]``.
The first two dimensions
:attr:`batch_size` and :attr:`max_time` are exchanged if
:attr:`time_major` is `True`.
sequence_length (optional): A 1D :tensor:`LongTensor` of shape
``[batch_size]``.
Sequence lengths of the batch inputs. Used to copy-through
state and zero-out outputs when past a batch element's sequence
length.
initial_state (optional): Initial state of the RNN.
time_major (bool): The shape format of the :attr:`inputs` and
:attr:`outputs` Tensors. If `True`, these tensors are of shape
``[max_time, batch_size, depth]``. If `False` (default),
these tensors are of shape ``[batch_size, max_time, depth]``.
return_cell_output (bool): Whether to return the output of the RNN
cell. This is the results prior to the output layer.
return_output_size (bool): Whether to return the size of the
output (i.e., the results after output layers).
Returns:
- By default (both ``return_cell_output`` and ``return_output_size``
are `False`), returns a pair :attr:`(outputs, final_state)`,
where
- :attr:`outputs`: The RNN output tensor by the output layer
(if exists) or the RNN cell (otherwise). The tensor is of
shape ``[batch_size, max_time, output_size]`` if
``time_major`` is `False`, or
``[max_time, batch_size, output_size]`` if
``time_major`` is `True`.
If RNN cell output is a (nested) tuple of Tensors, then the
:attr:`outputs` will be a (nested) tuple having the same
nest structure as the cell output.
- :attr:`final_state`: The final state of the RNN, which is a
Tensor of shape ``[batch_size] + cell.state_size`` or
a (nested) tuple of Tensors if ``cell.state_size`` is a
(nested) tuple.
- If ``return_cell_output`` is True, returns a triple
:attr:`(outputs, final_state, cell_outputs)`
- :attr:`cell_outputs`: The outputs by the RNN cell prior to the
output layer, having the same structure with :attr:`outputs`
except for the ``output_dim``.
- If ``return_output_size`` is `True`, returns a tuple
:attr:`(outputs, final_state, output_size)`
- :attr:`output_size`: A (possibly nested tuple of) int
representing the size of :attr:`outputs`. If a single int or
an int array, then ``outputs`` has shape
``[batch/time, time/batch] + output_size``. If
a (nested) tuple, then ``output_size`` has the same
structure as with ``outputs``.
- If both ``return_cell_output`` and ``return_output_size`` are
`True`, returns
:attr:`(outputs, final_state, cell_outputs, output_size)`.
"""
cell_outputs, state = dynamic_rnn(
cell=self._cell,
inputs=inputs,
sequence_length=sequence_length,
initial_state=initial_state,
time_major=time_major)
outputs, output_size = _forward_output_layers(
inputs=cell_outputs,
output_layer=self._output_layer,
time_major=time_major,
sequence_length=sequence_length)
rets = (outputs, state)
if return_cell_output:
rets += (cell_outputs,) # type: ignore
if return_output_size:
rets += (output_size,) # type: ignore
return rets
@property
def cell(self) -> RNNCellBase[State]:
r"""The RNN cell.
"""
return self._cell
@property
def state_size(self) -> int:
r"""The state size of encoder cell.
Same as :attr:`encoder.cell.state_size`.
"""
if isinstance(self._cell, LSTMCell):
return 2 * self._cell.hidden_size # type: ignore
else:
return self._cell.hidden_size
@property
def output_layer(self) -> Optional[nn.Module]:
r"""The output layer.
"""
return self._output_layer
@property
def output_size(self) -> int:
r"""The feature size of :meth:`forward` output :attr:`outputs`.
If output layer does not exist, the feature size is equal to
:attr:`encoder.cell.hidden_size`, otherwise the feature size
is equal to last dimension value of output layer output size.
"""
# TODO: We will change the implementation to
# something that does not require a forward pass.
dim = self._cell.hidden_size
if self._output_layer is not None:
dummy_tensor = torch.Tensor(dim)
dim = self._output_layer(dummy_tensor).size(-1)
return dim
[docs]class BidirectionalRNNEncoder(RNNEncoderBase):
r"""Bidirectional forward-backward RNN encoder.
Args:
cell_fw (RNNCell, optional): The forward RNN cell. If not given,
a cell is created as specified in ``hparams["rnn_cell_fw"]``.
cell_bw (RNNCell, optional): The backward RNN cell. If not given,
a cell is created as specified in ``hparams["rnn_cell_bw"]``.
output_layer_fw (optional): An instance of
:torch_nn:`Module`. Apply to the forward
RNN cell output of each step. If `None` (default), the output
layer is created as specified in ``hparams["output_layer_fw"]``.
output_layer_bw (optional): An instance of
:torch_nn:`Module`. Apply to the backward
RNN cell output of each step. If `None` (default), the output
layer is created as specified in ``hparams["output_layer_bw"]``.
hparams (dict or HParams, optional): Hyperparameters. Missing
hyperparameters will be set to default values. See
:meth:`default_hparams` for the hyperparameter structure and
default values.
See :meth:`forward` for the inputs and outputs of the encoder.
Example:
.. code-block:: python
# Use with embedder
embedder = WordEmbedder(vocab_size, hparams=emb_hparams)
encoder = BidirectionalRNNEncoder(hparams=enc_hparams)
outputs, final_state = encoder(
inputs=embedder(data_batch['text_ids']),
sequence_length=data_batch['length'])
# outputs == (outputs_fw, outputs_bw)
# final_state == (final_state_fw, final_state_bw)
.. document private functions
"""
def __init__(self,
input_size: int,
cell_fw: Optional[RNNCellBase[State]] = None,
cell_bw: Optional[RNNCellBase[State]] = None,
output_layer_fw: Optional[nn.Module] = None,
output_layer_bw: Optional[nn.Module] = None,
hparams=None):
super().__init__(hparams=hparams)
# Make RNN cells
if cell_fw is not None:
self._cell_fw = cell_fw
else:
self._cell_fw = layers.get_rnn_cell(input_size,
self._hparams.rnn_cell_fw)
if cell_bw is not None:
self._cell_bw = cell_bw
elif self._hparams.rnn_cell_share_config:
self._cell_bw = layers.get_rnn_cell(input_size,
self._hparams.rnn_cell_fw)
else:
self._cell_bw = layers.get_rnn_cell(input_size,
self._hparams.rnn_cell_bw)
# Make output layers
self.__output_layer_fw: Optional[nn.Module]
if output_layer_fw is not None:
self._output_layer_fw = output_layer_fw
self._output_layer_hparams_fw = None
else:
self._output_layer_fw = _build_dense_output_layer( # type: ignore
self._cell_fw.hidden_size, self._hparams.output_layer_fw)
self._output_layer_hparams_fw = self._hparams.output_layer_fw
self.__output_layer_bw: Optional[nn.Module]
if output_layer_bw is not None:
self._output_layer_bw = output_layer_bw
self._output_layer_hparams_bw = None
elif self._hparams.output_layer_share_config:
self._output_layer_bw = _build_dense_output_layer( # type: ignore
self._cell_bw.hidden_size, self._hparams.output_layer_fw)
self._output_layer_hparams_bw = self._hparams.output_layer_fw
else:
self._output_layer_bw = _build_dense_output_layer( # type: ignore
self._cell_bw.hidden_size, self._hparams.output_layer_bw)
self._output_layer_hparams_bw = self._hparams.output_layer_bw
[docs] @staticmethod
def default_hparams() -> Dict[str, Any]:
r"""Returns a dictionary of hyperparameters with default values.
.. code-block:: python
{
"rnn_cell_fw": default_rnn_cell_hparams(),
"rnn_cell_bw": default_rnn_cell_hparams(),
"rnn_cell_share_config": True,
"output_layer_fw": {
"num_layers": 0,
"layer_size": 128,
"activation": "identity",
"final_layer_activation": None,
"other_dense_kwargs": None,
"dropout_layer_ids": [],
"dropout_rate": 0.5,
"variational_dropout": False
},
"output_layer_bw": {
# Same hyperparams and default values as "output_layer_fw"
# ...
},
"output_layer_share_config": True,
"name": "bidirectional_rnn_encoder"
}
Here:
`"rnn_cell_fw"`: dict
Hyperparameters of the forward RNN cell.
Ignored if :attr:`cell_fw` is given to the encoder constructor.
The default value is defined in
:func:`~texar.torch.core.default_rnn_cell_hparams`.
`"rnn_cell_bw"`: dict
Hyperparameters of the backward RNN cell.
Ignored if :attr:`cell_bw` is given to the encoder constructor,
or if `"rnn_cell_share_config"` is `True`.
The default value is defined in
:meth:`~texar.torch.core.default_rnn_cell_hparams`.
`"rnn_cell_share_config"`: bool
Whether share hyperparameters of the backward cell with the
forward cell. Note that the cell parameters (variables) are not
shared.
`"output_layer_fw"`: dict
Hyperparameters of the forward output layer. Ignored if
``output_layer_fw`` is given to the constructor.
See the ``"output_layer"`` field of
:meth:`~texar.torch.modules.UnidirectionalRNNEncoder` for details.
`"output_layer_bw"`: dict
Hyperparameters of the backward output layer. Ignored if
:attr:`output_layer_bw` is given to the constructor. Have the
same structure and defaults with :attr:`"output_layer_fw"`.
Ignored if ``output_layer_share_config`` is `True`.
`"output_layer_share_config"`: bool
Whether share hyperparameters of the backward output layer
with the forward output layer. Note that the layer parameters
(variables) are not shared.
`"name"`: str
Name of the encoder
"""
hparams = RNNEncoderBase.default_hparams()
hparams.update({
"rnn_cell_fw": layers.default_rnn_cell_hparams(),
"rnn_cell_bw": layers.default_rnn_cell_hparams(),
"rnn_cell_share_config": True,
"output_layer_fw": _default_output_layer_hparams(),
"output_layer_bw": _default_output_layer_hparams(),
"output_layer_share_config": True,
"name": "bidirectional_rnn_encoder"
})
return hparams
[docs] def forward(self, # type: ignore
inputs: torch.Tensor,
sequence_length: Optional[Union[torch.LongTensor,
List[int]]] = None,
initial_state_fw: Optional[State] = None,
initial_state_bw: Optional[State] = None,
time_major: bool = False,
return_cell_output: bool = False,
return_output_size: bool = False):
r"""Encodes the inputs.
Args:
inputs: A 3D Tensor of shape ``[batch_size, max_time, dim]``.
The first two dimensions
``batch_size`` and ``max_time`` may be exchanged if
``time_major`` is `True`.
sequence_length (optional): A 1D :tensor:`LongTensor` of shape
``[batch_size]``.
Sequence lengths of the batch inputs. Used to copy-through
state and zero-out outputs when past a batch element's sequence
length.
initial_state_fw: (optional): Initial state of the forward RNN.
initial_state_bw: (optional): Initial state of the backward RNN.
time_major (bool): The shape format of the :attr:`inputs` and
:attr:`outputs` Tensors. If `True`, these tensors are of shape
``[max_time, batch_size, depth]``. If `False` (default),
these tensors are of shape ``[batch_size, max_time, depth]``.
return_cell_output (bool): Whether to return the output of the RNN
cell. This is the results prior to the output layer.
return_output_size (bool): Whether to return the output size of the
RNN cell. This is the results after the output layer.
Returns:
- By default (both ``return_cell_output`` and ``return_output_size``
are `False`), returns a pair :attr:`(outputs, final_state)`
- :attr:`outputs`: A tuple ``(outputs_fw, outputs_bw)``
containing the forward and the backward RNN outputs, each of
which is of shape ``[batch_size, max_time, output_dim]``
if ``time_major`` is `False`, or
``[max_time, batch_size, output_dim]`` if ``time_major``
is `True`.
If RNN cell output is a (nested) tuple of Tensors, then
``outputs_fw`` and ``outputs_bw`` will be a (nested) tuple
having the same structure as the cell output.
- :attr:`final_state`: A tuple
``(final_state_fw, final_state_bw)`` containing the final
states of the forward and backward RNNs, each of which is a
Tensor of shape ``[batch_size] + cell.state_size``, or a
(nested) tuple of Tensors if ``cell.state_size`` is a (nested)
tuple.
- If ``return_cell_output`` is `True`, returns a triple
:attr:`(outputs, final_state, cell_outputs)` where
- :attr:`cell_outputs`: A tuple
``(cell_outputs_fw, cell_outputs_bw)`` containing the outputs
by the forward and backward RNN cells prior to the output
layers, having the same structure with :attr:`outputs` except
for the ``output_dim``.
- If ``return_output_size`` is `True`, returns a tuple
:attr:`(outputs, final_state, output_size)` where
- :attr:`output_size`: A tuple
``(output_size_fw, output_size_bw)`` containing the size of
``outputs_fw`` and ``outputs_bw``, respectively.
Take ``*_fw`` for example, ``output_size_fw`` is a (possibly
nested tuple of) int. If a single int or an int array, then
``outputs_fw`` has shape
``[batch/time, time/batch] + output_size_fw``. If a (nested)
tuple, then ``output_size_fw`` has the same structure as
``outputs_fw``. The same applies to ``output_size_bw``.
- If both ``return_cell_output`` and ``return_output_size`` are
`True`, returns
:attr:`(outputs, final_state, cell_outputs, output_size)`.
"""
cell_outputs, states = bidirectional_dynamic_rnn(
cell_fw=self._cell_fw,
cell_bw=self._cell_bw,
inputs=inputs,
sequence_length=sequence_length,
initial_state_fw=initial_state_fw,
initial_state_bw=initial_state_bw,
time_major=time_major)
outputs_fw, output_size_fw = _forward_output_layers(
inputs=cell_outputs[0],
output_layer=self._output_layer_fw,
time_major=time_major,
sequence_length=sequence_length)
outputs_bw, output_size_bw = _forward_output_layers(
inputs=cell_outputs[1],
output_layer=self._output_layer_bw,
time_major=time_major,
sequence_length=sequence_length)
outputs = (outputs_fw, outputs_bw)
output_size = (output_size_fw, output_size_bw)
returns = (outputs, states)
if return_cell_output:
returns += (cell_outputs,) # type: ignore
if return_output_size:
returns += (output_size,) # type: ignore
return returns
@property
def cell_fw(self) -> RNNCellBase[State]:
r"""The forward RNN cell.
"""
return self._cell_fw
@property
def cell_bw(self) -> RNNCellBase[State]:
r"""The backward RNN cell.
"""
return self._cell_bw
@property
def state_size_fw(self) -> int:
r"""The state size of the forward encoder cell.
Same as :attr:`encoder.cell_fw.state_size`.
"""
if isinstance(self._cell_fw, LSTMCell):
return 2 * self._cell_fw.hidden_size # type: ignore
else:
return self._cell_fw.hidden_size
@property
def state_size_bw(self) -> int:
r"""The state size of the backward encoder cell.
Same as :attr:`encoder.cell_bw.state_size`.
"""
if isinstance(self._cell_bw, LSTMCell):
return 2 * self._cell_bw.hidden_size # type: ignore
else:
return self._cell_bw.hidden_size
@property
def output_layer_fw(self) -> Optional[nn.Module]:
r"""The output layer of the forward RNN.
"""
return self._output_layer_fw
@property
def output_layer_bw(self) -> Optional[nn.Module]:
r"""The output layer of the backward RNN.
"""
return self._output_layer_bw
@property
def output_size(self) -> Tuple[int, int]:
r"""The feature sizes of :meth:`forward` outputs
:attr:`output_size_fw` and :attr:`output_size_bw`.
Each feature size is equal to last dimension
value of corresponding result size.
"""
# TODO: We will change the implementation to
# something that does not require a forward pass.
dim_bw = self._cell_bw.hidden_size
dim_fw = self._cell_fw.hidden_size
if self._output_layer_bw is not None:
dummy_tensor_bw = torch.Tensor(dim_bw)
output_bw = self._output_layer_bw(dummy_tensor_bw).size()[-1]
else:
output_bw = dim_bw
if self._output_layer_fw is not None:
dummy_tensor_fw = torch.Tensor(dim_fw)
output_fw = self._output_layer_fw(dummy_tensor_fw).size()[-1]
else:
output_fw = dim_fw
return (output_fw, output_bw)