Source code for texar.torch.run.metric.regression

# Copyright 2019 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Executor metrics for regression tasks.
"""

import math
from typing import Sequence

from texar.torch.run.metric.base_metric import StreamingMetric

__all__ = [
    "PearsonR",
    "RMSE",
]


[docs]class PearsonR(StreamingMetric[float, float]):
    r"""The Pearson correlation coefficient (Pearson's r) metric for evaluation
    regression tasks. Pearson's r is a measure of linear correlation between two
    sets of variables. Pearson's r ranges between -1 and 1, with 1 indicating
    total positive linear correlation, -1 indicating total negative linear
    correlation, and 0 indication no linear correlation.

    Pearson's r is a :class:`~texar.torch.run.metric.StreamingMetric`, requires
    both predicted values and labels. Pearson's r values are :class:`float`
    numbers between -1 and 1, with higher values being better.

    Keyword Args:
        pred_name (str): Name of the predicted value. This will be used as the
            key to the dictionary returned by the model.
        label_name (str): Name of the label. This will be used as the key to the
            batch object returned by the dataset. Defaults to ``"label"``.
    """
    x_sum: float
    x2_sum: float
    y_sum: float
    y2_sum: float
    xy_sum: float

    def reset(self) -> None:
        super().reset()
        self.x_sum = self.y_sum = 0.0
        self.x2_sum = self.y2_sum = 0.0
        self.xy_sum = 0.0

    def add(self, xs: Sequence[float], ys: Sequence[float]):
        super().add(xs, ys)
        self.x_sum += sum(xs)
        self.x2_sum += sum(x * x for x in xs)
        self.y_sum += sum(ys)
        self.y2_sum += sum(y * y for y in ys)
        self.xy_sum += sum(x * y for x, y in zip(xs, ys))

    def value(self) -> float:
        if self.count == 0:
            return 0.0
        numerator = self.xy_sum - self.x_sum * self.y_sum / self.count
        denominator_x = self.x2_sum - self.x_sum ** 2 / self.count
        denominator_y = self.y2_sum - self.y_sum ** 2 / self.count
        if denominator_x == 0.0 or denominator_y == 0.0:
            return math.nan
        return numerator / math.sqrt(denominator_x * denominator_y)


[docs]class RMSE(StreamingMetric[float, float]):
    r"""The root mean squared error (RMSE) metric for evaluation regression
    tasks. RMSE is defined as the standard deviation of the residuals
    (difference between predicted values and ground truth values).

    RMSE is a :class:`~texar.torch.run.metric.StreamingMetric`, requires both
    predicted values and labels. RMSE values are :class:`float` numbers with a
    lower bound of 0. Lower values are better.

    Keyword Args:
        pred_name (str): Name of the predicted value. This will be used as the
            key to the dictionary returned by the model.
        label_name (str): Name of the label. This will be used as the key to the
            batch object returned by the dataset. Defaults to ``"label"``.
    """
    higher_is_better = False

    squared_sum: float

    def reset(self) -> None:
        super().reset()
        self.squared_sum = 0.0

    def add(self, predicted: Sequence[float], labels: Sequence[float]) -> None:
        super().add(predicted, labels)
        self.squared_sum += sum((x - y) ** 2 for x, y in zip(predicted, labels))

    def value(self) -> float:
        if self.count == 0:
            return 0.0
        return math.sqrt(self.squared_sum / self.count)