Source code for deeprob.utils.region

# MIT License: Copyright (c) 2021 Lorenzo Loconte, Gennaro Gala

from typing import Optional, List

import numpy as np

from deeprob.utils.random import RandomState, check_random_state


[docs]class RegionGraph: def __init__(self, n_features: int, depth: int, random_state: Optional[RandomState] = None): """ Initialize a region graph. A region graph is defined w.r.t. a set of indices of random variable in a SPN. A *region* R is defined as a non-empty subset of the indices, and represented as sorted tuples with unique entries. A *partition* P of a region R is defined as a collection of non-empty sets, which are non-overlapping, and whose union is R. R is also called *parent* region of P. Any region C such that C is in partition P is called *child region* of P. So, a region is represented as a sorted tuple of integers (unique elements) and a partition is represented as a sorted tuple of regions (non-overlapping, not-empty, at least 2). A *region graph* is an acyclic, directed, bi-partite graph over regions and partitions. So, any child of a region R is a partition of R, and any child of a partition is a child region of the partition. The root of the region graph is a sorted tuple composed of all the elements. The leaves of the region graph must also be regions. They are called input regions, or leaf regions. Given a region graph, we can easily construct a corresponding SPN: 1) Associate I distributions to each input region. 2) Associate K sum nodes to each other (non-input) region. 3) For each partition P in the region graph, take all cross-products (as product nodes) of distributions/sum nodes associated with the child regions. Connect these products as children of all sum nodes in the parent region of P. In the end, this procedure will always deliver a complete and decomposable SPN. :param n_features: The number of features. :param depth: The maximum depth. :param random_state: The random state. It can be either None, a seed integer or a Numpy RandomState. :raises ValueError: If a parameter is out of domain. """ if n_features <= 0: raise ValueError("The number of features must be positive") if depth <= 0: raise ValueError("The region graph depth must be positive") if depth > int(np.log2(n_features)): raise ValueError("Invalid region graph depth based on the number of features") self.items = tuple(range(n_features)) self.depth = depth # Check the random state self.random_state = check_random_state(random_state)
[docs] def random_layers(self) -> List[List[tuple]]: """ Generate a list of layers randomly over a single repetition of features. :return: A list of layers, alternating between regions and partitions. """ root = [self.items] layers = [root] for i in range(self.depth): regions = [] partitions = [] for r in layers[i * 2]: mid = len(r) // 2 permutation = self.random_state.permutation(r).tolist() p0 = tuple(sorted(permutation[:mid])) p1 = tuple(sorted(permutation[mid:])) regions.append(p0) regions.append(p1) partitions.append((p0, p1)) layers.append(partitions) layers.append(regions) return layers
[docs] def make_layers(self, n_repetitions: int = 1) -> List[List[tuple]]: """ Generate a random graph's layers over multiple repetitions of features. :param n_repetitions: The number of repetitions. :return: A list of layers, alternating between regions and partitions. :raises ValueError: If a parameter is out of domain. """ if n_repetitions <= 0: raise ValueError("The number of repetitions must be positve") root = [self.items] graph_layers = [root] + [[]] * (self.depth * 2) for _ in range(n_repetitions): layers = self.random_layers() for h in range(1, len(layers)): graph_layers[h] = graph_layers[h] + layers[h] return graph_layers