Source code for texar.torch.run.metric.classification

# Copyright 2019 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Executor metrics for classification tasks.
"""

from abc import ABC
from typing import Dict, List, Optional, Sequence, Tuple, TypeVar

import numpy as np

from texar.torch.run.metric.base_metric import StreamingMetric

__all__ = [
    "Accuracy",
    "ConfusionMatrix",
    "Precision",
    "Recall",
    "F1",
]

Input = TypeVar('Input')
Value = TypeVar('Value')


[docs]class Accuracy(StreamingMetric[Input, float]): r"""The accuracy metric for evaluation classification tasks. Accuracy is defined as the ratio of correct (exactly matching) predictions out of all predictions. Accuracy is a :class:`~texar.torch.run.metric.StreamingMetric`, requires both predicted values and labels. Accuracy values are :class:`float` numbers between 0 and 1, with higher values being better. Keyword Args: pred_name (str): Name of the predicted value. This will be used as the key to the dictionary returned by the model. label_name (str): Name of the label. This will be used as the key to the batch object returned by the dataset. Defaults to ``"label"``. """ correct: int def reset(self) -> None: super().reset() self.correct = 0 def add(self, predicted: Sequence[Input], labels: Sequence[Input]) -> None: super().add(predicted, labels) self.correct += sum(int(a == b) for a, b in zip(predicted, labels)) def value(self) -> float: if self.count == 0: return 0.0 return self.correct / self.count
class _ConfusionMatrix(StreamingMetric[Input, Value], ABC): count: int matrix: Optional[np.ndarray] # matrix[pred][label] pred_count: List[int] label_count: List[int] _class_id: Dict[Input, int] def reset(self) -> None: super().reset() self.matrix = None self.pred_count = [] self.label_count = [] self._class_id = {} def _convert_ids(self, classes: Sequence[Input]) -> List[int]: ids = [] cnt = 0 for klass in classes: if klass not in self._class_id: self._class_id[klass] = len(self._class_id) cnt += 1 ids.append(self._class_id[klass]) if self.matrix is None: self.matrix = np.zeros((cnt, cnt), dtype=np.int) else: self.matrix = np.pad(self.matrix, [(0, cnt), (0, cnt)], "constant", constant_values=0) self.pred_count.extend([0] * cnt) self.label_count.extend([0] * cnt) return ids def add(self, predicted: Sequence[Input], labels: Sequence[Input]) -> None: super().add(predicted, labels) predicted = self._convert_ids(predicted) labels = self._convert_ids(labels) assert self.matrix is not None for pred, label in zip(predicted, labels): self.matrix[pred, label] += 1 self.pred_count[pred] += 1 self.label_count[label] += 1
[docs]class ConfusionMatrix(_ConfusionMatrix[Input, Optional[np.ndarray]]): r"""The confusion matrix is an evaluation metric for classification tasks. Confusion matrix is a :class:`~texar.torch.run.metric.StreamingMetric`, requires both predicted values and labels. Confusion matrix values are NumPy arrays, with no clear definition of "better". Comparison of two confusion matrices are not meaningful. The value indexed at ``(i, j)`` of the confusion matrix is the number of data points whose predicted label is `i` and whose ground truth label is `j`. Labels are internally mapped to indices. Keyword Args: pred_name (str): Name of the predicted value. This will be used as the key to the dictionary returned by the model. label_name (str): Name of the label. This will be used as the key to the batch object returned by the dataset. Defaults to ``"label"``. """ def value(self) -> Optional[np.ndarray]: return self.matrix @property def class_id(self): r"""Mapping of predicted values and labels to indices within the matrix. """ return self._class_id def better(self, cur: Value, prev: Value) -> Optional[bool]: # pylint: disable=unused-argument # Always return `None` to indicate values are uncomparable. return None
class _MicroMacro(_ConfusionMatrix[Input, float], ABC): _valid_modes = ['binary', 'micro', 'macro', 'weighted'] def __init__(self, mode: str = 'binary', pos_label: Optional[Input] = None, *, pred_name: str, label_name: str = "label"): super().__init__(pred_name=pred_name, label_name=label_name) self.mode = mode if self.mode not in self._valid_modes: raise ValueError(f"Invalid mode {mode}. " f"Supported modes are: {self._valid_modes}") if self.mode == 'binary' and pos_label is None: raise ValueError("`pos_label` must not be none when `mode` is " "set to 'binary'") if pos_label is not None: self.pos_label = pos_label def _safe_divide(self, numerator: np.ndarray, denominator: np.ndarray) \ -> np.ndarray: # Credit: sklearn.metrics.classification._prf_divide if numerator.size == 1: if denominator == 0.0: return np.array(0.0) return numerator / denominator mask = denominator == 0.0 denominator = denominator.copy() denominator[mask] = 1.0 value = numerator / denominator return value def _convert_value(self, value: np.ndarray) -> np.ndarray: if self.mode == 'binary': label = self._class_id.get(self.pos_label, None) if label is None: return np.array(0) return value[label] if self.mode == 'micro': return value.sum() return value def _true_positive(self) -> np.ndarray: assert self.matrix is not None value = self.matrix.diagonal() return self._convert_value(value) def _true_negative(self) -> np.ndarray: assert self.matrix is not None value = (self.count - np.asarray(self.pred_count) - np.asarray(self.label_count) + self.matrix.diagonal()) return self._convert_value(value) def _false_positive(self) -> np.ndarray: assert self.matrix is not None value = np.asarray(self.pred_count) - self.matrix.diagonal() return self._convert_value(value) def _false_negative(self) -> np.ndarray: assert self.matrix is not None value = np.asarray(self.label_count) - self.matrix.diagonal() return self._convert_value(value) def _value(self) -> Tuple[np.ndarray, np.ndarray]: r"""Return the numerator and denominator of the metric value. """ raise NotImplementedError def value(self) -> float: if self.count == 0: return 0.0 numerator, denominator = self._value() value = self._safe_divide(numerator, denominator) if self.mode == 'macro': value = value.sum() / len(self._class_id) elif self.mode == 'weighted': value = (value * np.asarray(self.label_count)).sum() / self.count return value.item()
[docs]class Precision(_MicroMacro[Input]): r"""The precision metric for evaluation classification tasks. Precision is defined as the ratio of ``tp / (tp + fp)``, where ``tp`` is the number of true positives and ``fp`` is the number of false positives. Precision is a :class:`~texar.torch.run.metric.StreamingMetric`, requires both predicted values and labels. Precision values are :class:`float` numbers between 0 and 1, with higher values being better. Args: mode (str): The mode for computing averages across multiple labels. Defaults to ``"binary"``. Available options include: - ``"binary"``: Only report results for the class specified by :attr:`pos_label`. This is only meaningful for binary classification tasks. - ``"micro"``: Return the precision value computed using globally counted true positives and false positives. - ``"macro"``: Return the unweighted average of precision values for each label. - ``"weighted"``: Return the average of precision values for each label, weighted by the number of true instances for each label. pos_label (str, optional): The label for the positive class. Only used if :attr:`mode` is set to ``"binary"``. Keyword Args: pred_name (str): Name of the predicted value. This will be used as the key to the dictionary returned by the model. label_name (str): Name of the label. This will be used as the key to the batch object returned by the dataset. Defaults to ``"label"``. """ def _value(self) -> Tuple[np.ndarray, np.ndarray]: return (self._true_positive(), (self._true_positive() + self._false_positive()))
[docs]class Recall(_MicroMacro[Input]): r"""The recall metric for evaluation classification tasks. Precision is defined as the ratio of ``tp / (tp + fn)``, where ``tp`` is the number of true positives and ``fn`` is the number of false negatives. Recall is a :class:`~texar.torch.run.metric.StreamingMetric`, requires both predicted values and labels. Recall values are :class:`float` numbers between 0 and 1, with higher values being better. Args: mode (str): The mode for computing averages across multiple labels. Defaults to ``"binary"``. Available options include: - ``"binary"``: Only report results for the class specified by :attr:`pos_label`. This is only meaningful for binary classification tasks. - ``"micro"``: Return the recall value computed using globally counted true positives and false negatives. - ``"macro"``: Return the unweighted average of recall values for each label. - ``"weighted"``: Return the average of recall values for each label, weighted by the number of true instances for each label. pos_label (str, optional): The label for the positive class. Only used if :attr:`mode` is set to ``"binary"``. Keyword Args: pred_name (str): Name of the predicted value. This will be used as the key to the dictionary returned by the model. label_name (str): Name of the label. This will be used as the key to the batch object returned by the dataset. Defaults to ``"label"``. """ def _value(self) -> Tuple[np.ndarray, np.ndarray]: return (self._true_positive(), (self._true_positive() + self._false_negative()))
[docs]class F1(Precision[Input], Recall[Input]): r"""The F1 metric for evaluation classification tasks. F1 is defined as the harmonic mean of precision and recall. F1 is a :class:`~texar.torch.run.metric.StreamingMetric`, requires both predicted values and labels. F1 values are :class:`float` numbers between 0 and 1, with higher values being better. Args: mode (str): The mode for computing averages across multiple labels. Defaults to ``"binary"``. Available options include: - ``"binary"``: Only report results for the class specified by :attr:`pos_label`. This is only meaningful for binary classification tasks. - ``"micro"``: Return the F1 value computed using globally counted true positives, false positives, and false negatives. - ``"macro"``: Return the unweighted average of F1 values for each label. - ``"weighted"``: Return the average of F1 values for each label, weighted by the number of true instances for each label. pos_label (str, optional): The label for the positive class. Only used if :attr:`mode` is set to ``"binary"``. Keyword Args: pred_name (str): Name of the predicted value. This will be used as the key to the dictionary returned by the model. label_name (str): Name of the label. This will be used as the key to the batch object returned by the dataset. Defaults to ``"label"``. """ def _value(self) -> Tuple[np.ndarray, np.ndarray]: # pylint: disable=protected-access precision = self._safe_divide(*Precision._value(self)) recall = self._safe_divide(*Recall._value(self)) # pylint: enable=protected-access return (2 * precision * recall, precision + recall)