Source code for deeprob.utils.region

# MIT License: Copyright (c) 2021 Lorenzo Loconte, Gennaro Gala

from typing import Optional, List

import numpy as np

from deeprob.utils.random import RandomState, check_random_state


[docs]class RegionGraph:
    def __init__(self, n_features: int, depth: int, random_state: Optional[RandomState] = None):
        """
        Initialize a region graph.

        A region graph is defined w.r.t. a set of indices of random variable in a SPN.
        A *region* R is defined as a non-empty subset of the indices,
        and represented as sorted tuples with unique entries.
        A *partition* P of a region R is defined as a collection of non-empty sets,
        which are non-overlapping, and whose
        union is R. R is also called *parent* region of P.
        Any region C such that C is in partition P is called *child region* of P.
        So, a region is represented as a sorted tuple of integers (unique elements)
        and a partition is represented as a sorted tuple of regions (non-overlapping, not-empty, at least 2).
        A *region graph* is an acyclic, directed, bi-partite graph over regions and partitions.
        So, any child of a region R is a partition of R, and any child of a partition is
        a child region of the partition. The root of the region graph
        is a sorted tuple composed of all the elements. The leaves of the region graph must also be regions.
        They are called input regions, or leaf regions.
        Given a region graph, we can easily construct a corresponding SPN:
        1) Associate I distributions to each input region.
        2) Associate K sum nodes to each other (non-input) region.
        3) For each partition P in the region graph,
        take all cross-products (as product nodes) of distributions/sum nodes associated with the child regions.
        Connect these products as children of all sum nodes in the parent region of P.
        In the end, this procedure will always deliver a complete and decomposable SPN.

        :param n_features: The number of features.
        :param depth: The maximum depth.
        :param random_state: The random state. It can be either None, a seed integer or a Numpy RandomState.
        :raises ValueError: If a parameter is out of domain.
        """
        if n_features <= 0:
            raise ValueError("The number of features must be positive")
        if depth <= 0:
            raise ValueError("The region graph depth must be positive")
        if depth > int(np.log2(n_features)):
            raise ValueError("Invalid region graph depth based on the number of features")

        self.items = tuple(range(n_features))
        self.depth = depth

        # Check the random state
        self.random_state = check_random_state(random_state)

[docs]    def random_layers(self) -> List[List[tuple]]:
        """
        Generate a list of layers randomly over a single repetition of features.

        :return: A list of layers, alternating between regions and partitions.
        """
        root = [self.items]
        layers = [root]

        for i in range(self.depth):
            regions = []
            partitions = []
            for r in layers[i * 2]:
                mid = len(r) // 2
                permutation = self.random_state.permutation(r).tolist()
                p0 = tuple(sorted(permutation[:mid]))
                p1 = tuple(sorted(permutation[mid:]))
                regions.append(p0)
                regions.append(p1)
                partitions.append((p0, p1))
            layers.append(partitions)
            layers.append(regions)

        return layers

[docs]    def make_layers(self, n_repetitions: int = 1) -> List[List[tuple]]:
        """
        Generate a random graph's layers over multiple repetitions of features.

        :param n_repetitions: The number of repetitions.
        :return: A list of layers, alternating between regions and partitions.
        :raises ValueError: If a parameter is out of domain.
        """
        if n_repetitions <= 0:
            raise ValueError("The number of repetitions must be positve")

        root = [self.items]
        graph_layers = [root] + [[]] * (self.depth * 2)

        for _ in range(n_repetitions):
            layers = self.random_layers()
            for h in range(1, len(layers)):
                graph_layers[h] = graph_layers[h] + layers[h]

        return graph_layers