Source code for deeprob.torch.metrics

# MIT License: Copyright (c) 2021 Lorenzo Loconte, Gennaro Gala

from typing import Optional, Union, Any

import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils import data
from torchvision import transforms
from torchvision import models
from tqdm import tqdm

from deeprob.utils.statistics import compute_fid


[docs]class RunningAverageMetric:
    def __init__(self):
        """
        Initialize a running average metric object.
        """
        self.__samples_counter = 0
        self.__metric_accumulator = 0.0

[docs]    def __call__(self, metric: float, num_samples: int):
        """
        Accumulate a metric value.

        :param metric: The metric value.
        :param num_samples: The number of samples from which the metric is estimated.
        :raises ValueError: If the number of samples is not positive.
        """
        if num_samples <= 0:
            raise ValueError("The number of samples must be positive")
        self.__samples_counter += num_samples
        self.__metric_accumulator += metric * num_samples

[docs]    def reset(self):
        """
        Reset the running average metric accumulator.
        """
        self.__samples_counter = 0
        self.__metric_accumulator = 0.0

[docs]    def average(self) -> float:
        """
        Get the metric average.

        :return: The metric average.
        """
        return self.__metric_accumulator / self.__samples_counter


[docs]def fid_score(
    dataset1: Union[data.Dataset, torch.Tensor],
    dataset2: Union[data.Dataset, torch.Tensor],
    model: Optional[nn.Module] = None,
    transform: Optional[Any] = None,
    batch_size: int = 100,
    num_workers: int = 0,
    device: Optional[torch.device] = None,
    verbose: bool = True
) -> float:
    """
    Compute the Frechet Inception Distance (FID) between two data samples.
    This implementation has been readapted from https://github.com/mseitzer/pytorch-fid.
    IMPORTANT NOTE: The computed FID score is not comparable with other FID scores based on Tensorflow's InceptionV3.

    :param dataset1: The first samples data set.
    :param dataset2: The second samples data set.
    :param model: The model to use to extract the features.
                  If None the Torchvision's InceptionV3 model pretrained on ImageNet will be used.
    :param transform: An optional transformation to apply to every sample. If transform and model are both None,
                      then the transformation resizes to 3x299x299 and normalizes values from (0, 1) to (-1, 1).
    :param batch_size: The batch size to use when extracting features.
    :param num_workers: The number of workers used for the data loaders.
    :param device: The device used to run the model. If it's None 'cuda' will be used, if available.
    :param verbose: Whether to enable verbose mode.
    :return: The FID score.
    """
    if model is None:
        # Load the InceptionV3 model pretrained on ImageNet
        model = models.inception_v3(pretrained=True, aux_logits=False, transform_input=False)

        # Remove dropout and fully-connected layers (we are interested in extracted features)
        model.dropout = nn.Identity()
        model.fc = nn.Identity()

        # Set the transformation
        if transform is None:
            transform = transforms.Compose([
                transforms.Lambda(lambda x: F.interpolate(x, (299, 299), mode='bilinear', align_corners=False)),
                transforms.Normalize((0.5,), (0.5,))
            ])

    # Extract the features of the two data sets
    features1 = extract_features(
        model, dataset1, transform, device=device, verbose=verbose,
        batch_size=batch_size, num_workers=num_workers
    )
    features2 = extract_features(
        model, dataset2, transform, device=device, verbose=verbose,
        batch_size=batch_size, num_workers=num_workers
    )

    # Compute the statistics (mean and covariance of the features)
    features1, features2 = features1.cpu().numpy(), features2.cpu().numpy()
    mean1, cov1 = np.mean(features1, axis=0), np.cov(features1, rowvar=False)
    mean2, cov2 = np.mean(features2, axis=0), np.cov(features2, rowvar=False)

    # Compute and return the FID score
    return compute_fid(mean1, cov1, mean2, cov2)


[docs]def extract_features(
    model: nn.Module,
    dataset: Union[data.Dataset, torch.Tensor],
    transform: Optional[Any] = None,
    device: Optional[torch.device] = None,
    verbose: bool = True,
    **kwargs
) -> torch.Tensor:
    """
    Extract the features produced by a model using a data set.

    :param model: The model to use to extract the features.
    :param dataset: The data set.
    :param transform: An optional transformation to apply to every sample.
    :param device: The device used to run the model. If it's None 'cuda' will be used, if available.
    :param verbose: Whether to enable verbose mode.
    :param kwargs: Additional parameters to pass to the data loader.
    :return: The extracted features for each data sample.
    """
    # Get the device to use
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("Extract features using device: {}".format(device))

    # Instantiate the data loader
    data_loader = data.DataLoader(dataset, **kwargs)
    if verbose:
        data_loader = tqdm(
            data_loader, leave=False, bar_format='{l_bar}{bar:24}{r_bar}', unit='batch'
        )

    # Make sure the model is in evaluation mode
    # Moreover, move it to the desired device
    model.eval()
    model.to(device)

    # Extract the features
    with torch.no_grad():
        features = list()
        for batch in data_loader:
            if transform is not None:
                batch = transform(batch)
            batch = batch.to(device)
            batch_features = model(batch)
            features.append(batch_features)
    return torch.cat(features, dim=0)