Source code for pixyz.distributions.poe

from __future__ import print_function
import torch
from torch import nn

from ..utils import tolist, get_dict_values
from .exponential_distributions import Normal

[docs]class ProductOfNormal(Normal): r"""Product of normal distributions. .. math:: p(z|x,y) \propto p(z)p(z|x)p(z|y) In this model, :math:`p(z|x)` and :math:`p(a|y)` perform as `experts` and :math:`p(z)` corresponds a prior of `experts`. References ---------- [Vedantam+ 2017] Generative Models of Visually Grounded Imagination [Wu+ 2018] Multimodal Generative Models for Scalable Weakly-Supervised Learning Examples -------- >>> pon = ProductOfNormal([p_x, p_y]) # doctest: +SKIP >>> pon.sample({"x": x, "y": y}) # doctest: +SKIP {'x': tensor([[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]],), 'y': tensor([[0., 0., 0., ..., 0., 0., 1.], [0., 0., 1., ..., 0., 0., 0.], [0., 1., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 1., 0.], [1., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 1.]]), 'z': tensor([[ 0.6611, 0.3811, 0.7778, ..., -0.0468, -0.3615, -0.6569], [-0.0071, -0.9178, 0.6620, ..., -0.1472, 0.6023, 0.5903], [-0.3723, -0.7758, 0.0195, ..., 0.8239, -0.3537, 0.3854], ..., [ 0.7820, -0.4761, 0.1804, ..., -0.5701, -0.0714, -0.5485], [-0.1873, -0.2105, -0.1861, ..., -0.5372, 0.0752, 0.2777], [-0.2563, -0.0828, 0.1605, ..., 0.2767, -0.8456, 0.7364]])} >>> pon.sample({"y": y}) # doctest: +SKIP {'y': tensor([[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 1.], [0., 0., 0., ..., 1., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 1., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]), 'z': tensor([[-0.3264, -0.4448, 0.3610, ..., -0.7378, 0.3002, 0.4370], [ 0.0928, -0.1830, 1.1768, ..., 1.1808, -0.7226, -0.4152], [ 0.6999, 0.2222, -0.2901, ..., 0.5706, 0.7091, 0.5179], ..., [ 0.5688, -1.6612, -0.0713, ..., -0.1400, -0.3903, 0.2533], [ 0.5412, -0.0289, 0.6365, ..., 0.7407, 0.7838, 0.9218], [ 0.0299, 0.5148, -0.1001, ..., 0.9938, 1.0689, -1.1902]])} >>> pon.sample() # same as sampling from unit Gaussian. # doctest: +SKIP {'z': tensor(-0.4494)} """
[docs] def __init__(self, p=[], name="p", features_shape=torch.Size()): """ Parameters ---------- p : :obj:`list` of :class:`pixyz.distributions.Normal`. List of experts. name : :obj:`str`, defaults to "p" Name of this distribution. This name is displayed in prob_text and prob_factorized_text. features_shape : :obj:`torch.Size` or :obj:`list`, defaults to torch.Size()) Shape of dimensions (features) of this distribution. Examples -------- >>> p_x = Normal(cond_var=['z'], loc='z', scale=torch.ones(1, 1)) >>> pon = ProductOfNormal([p_x]) >>> sample = pon.sample({'z': torch.zeros(1, 1)}) >>> sample # doctest: +SKIP """ p = tolist(p) if len(p) == 0: raise ValueError() var = p[0].var cond_var = [] for _p in p: if _p.var != var: raise ValueError() if _p.distribution_name != "Normal": raise ValueError() cond_var += _p.cond_var super().__init__(cond_var=cond_var, var=var, name=name, features_shape=features_shape) self.p = nn.ModuleList(p)
@property def prob_factorized_text(self): prob_text = "p({})".format( ','.join(self._var) ) if len(self._cond_var) != 0: prob_text += "".join([p.prob_text for p in self.p]) return prob_text @property def prob_joint_factorized_and_text(self): """str: Return a formula of the factorized probability distribution.""" if self.prob_factorized_text == self.prob_text: prob_text = self.prob_text else: prob_text = "{} \\propto {}".format(self.prob_text, self.prob_factorized_text) return prob_text def _get_expert_params(self, params_dict={}, **kwargs): """Get the output parameters of all experts. Parameters ---------- params_dict : dict **kwargs Arbitrary keyword arguments. Returns ------- torch.Tensor """ loc = [] scale = [] for _p in self.p: inputs_dict = get_dict_values(params_dict, _p.cond_var, True) if len(inputs_dict) != 0: outputs = _p.get_params(inputs_dict, **kwargs) loc.append(outputs["loc"]) scale.append(outputs["scale"]) loc = torch.stack(loc) scale = torch.stack(scale) return loc, scale
[docs] def get_params(self, params_dict={}, **kwargs): # experts if len(params_dict) > 0: loc, scale = self._get_expert_params(params_dict, **kwargs) # (n_expert, n_batch, output_dim) else: loc = torch.zeros(1) scale = torch.zeros(1) output_loc, output_scale = self._compute_expert_params(loc, scale) output_dict = {"loc": output_loc, "scale": output_scale} return output_dict
@staticmethod def _compute_expert_params(loc, scale): """Compute parameters for the product of experts. Is is assumed that unspecified experts are excluded from inputs. Parameters ---------- loc : torch.Tensor Concatenation of mean vectors for specified experts. (n_expert, n_batch, output_dim) scale : torch.Tensor Concatenation of the square root of a diagonal covariance matrix for specified experts. (n_expert, n_batch, output_dim) Returns ------- output_loc : torch.Tensor Mean vectors for this distribution. (n_batch, output_dim) output_scale : torch.Tensor The square root of diagonal covariance matrices for this distribution. (n_batch, output_dim) """ # parameter for prior prior_prec = 1 # prior_loc is not specified because it is equal to 0. # compute the diagonal precision matrix. prec = torch.zeros_like(scale).type(scale.dtype) prec[scale != 0] = 1. / scale[scale != 0] # compute the square root of a diagonal covariance matrix for the product of distributions. output_prec = torch.sum(prec, dim=0) + prior_prec output_variance = 1. / output_prec # (n_batch, output_dim) # compute the mean vectors for the product of normal distributions. output_loc = torch.sum(prec * loc, dim=0) # (n_batch, output_dim) output_loc = output_loc * output_variance return output_loc, torch.sqrt(output_variance) def _get_input_dict(self, x, var=None): if var is None: var = self.input_var if type(x) is torch.Tensor: checked_x = {var[0]: x} elif type(x) is list: # TODO: we need to check if all the elements contained in this list are torch.Tensor. checked_x = dict(zip(var, x)) elif type(x) is dict: # point of modification checked_x = x else: raise ValueError("The type of input is not valid, got %s." % type(x)) return get_dict_values(checked_x, var, return_dict=True)
[docs] def log_prob(self, sum_features=True, feature_dims=None): raise NotImplementedError()
[docs] def prob(self, sum_features=True, feature_dims=None): raise NotImplementedError()
[docs] def get_log_prob(self, x_dict, sum_features=True, feature_dims=None): raise NotImplementedError()
[docs]class ElementWiseProductOfNormal(ProductOfNormal): r"""Product of normal distributions. In this distribution, each element of the input vector on the given distribution is considered as a different expert. .. math:: p(z|x) = p(z|x_1, x_2) \propto p(z)p(z|x_1)p(z|x_2) Examples -------- >>> pon = ElementWiseProductOfNormal(p) # doctest: +SKIP >>> pon.sample({"x": x}) # doctest: +SKIP {'x': tensor([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]), 'z': tensor([[-0.3572, -0.0632, 0.4872, 0.2269, -0.1693, -0.0160, -0.0429, 0.2017, -0.1589, -0.3380, -0.9598, 0.6216, -0.4296, -1.1349, 0.0901, 0.3994, 0.2313, -0.5227, -0.7973, 0.3968, 0.7137, -0.5639, -0.4891, -0.1249, 0.8256, 0.1463, 0.0801, -1.2202, 0.6984, -0.4036, 0.4960, -0.4376, 0.3310, -0.2243, -0.2381, -0.2200, 0.8969, 0.2674, 0.4681, 1.6764, 0.8127, 0.2722, -0.2048, 0.1903, -0.1398, 0.0099, 0.4382, -0.8016, 0.9947, 0.7556, -0.2017, -0.3920, 1.4212, -1.2529, -0.1002, -0.0031, 0.1876, 0.4267, 0.3622, 0.2648, 0.4752, 0.0843, -0.3065, -0.4922], [ 0.3770, -0.0413, 0.9102, 0.2897, -0.0567, 0.5211, 1.5233, -0.3539, 0.5163, -0.2271, -0.1027, 0.0294, -1.4617, 0.1640, 0.2025, -0.2190, 0.0555, 0.5779, -0.2930, -0.2161, 0.2835, -0.0354, -0.2569, -0.7171, 0.0164, -0.4080, 1.1088, 0.3947, 0.2720, -0.0600, -0.9295, -0.0234, 0.5624, 0.4866, 0.5285, 1.1827, 0.2494, 0.0777, 0.7585, 0.5127, 0.7500, -0.3253, 0.0250, 0.0888, 1.0340, -0.1405, -0.8114, 0.4492, 0.2725, -0.0270, 0.6379, -0.8096, 0.4259, 0.3179, -0.1681, 0.3365, 0.6305, 0.5203, 0.2384, 0.0572, 0.4804, 0.9553, -0.3244, 1.5373]])} >>> pon.sample({"x": torch.zeros_like(x)}) # same as sampling from unit Gaussian. # doctest: +SKIP {'x': tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), 'z': tensor([[-0.7777, -0.5908, -1.5498, -0.7505, 0.6201, 0.7218, 1.0045, 0.8923, -0.8030, -0.3569, 0.2932, 0.2122, 0.1640, 0.7893, -0.3500, -1.0537, -1.2769, 0.6122, -1.0083, -0.2915, -0.1928, -0.7486, 0.2418, -1.9013, 1.2514, 1.3035, -0.3029, -0.3098, -0.5415, 1.1970, -0.4443, 2.2393, -0.6980, 0.2820, 1.6972, 0.6322, 0.4308, 0.8953, 0.7248, 0.4440, 2.2770, 1.7791, 0.7563, -1.1781, -0.8331, 0.1825, 1.5447, 0.1385, -1.1348, 0.0257, 0.3374, 0.5889, 1.1231, -1.2476, -0.3801, -1.4404, -1.3066, -1.2653, 0.5958, -1.7423, 0.7189, -0.7236, 0.2330, 0.3117], [ 0.5495, 0.7210, -0.4708, -2.0631, -0.6170, 0.2436, -0.0133, -0.4616, -0.8091, -0.1592, 1.3117, 0.0276, 0.6625, -0.3748, -0.5049, 1.8260, -0.3631, 1.1546, -1.0913, 0.2712, 1.5493, 1.4294, -2.1245, -2.0422, 0.4976, -1.2785, 0.5028, 1.4240, 1.1983, 0.2468, 1.1682, -0.6725, -1.1198, -1.4942, -0.3629, 0.1325, -0.2256, 0.4280, 0.9830, -1.9427, -0.2181, 1.1850, -0.7514, -0.8172, 2.1031, -0.1698, -0.3777, -0.7863, 1.0936, -1.3720, 0.9999, 1.3302, -0.8954, -0.5999, 2.3305, 0.5702, -1.0767, -0.2750, -0.3741, -0.7026, -1.5408, 0.0667, 1.2550, -0.5117]])} """
[docs] def __init__(self, p, name="p", features_shape=torch.Size()): r""" Parameters ---------- p : pixyz.distributions.Normal Each element of this input vector is considered as a different expert. When some elements are 0, experts corresponding to these elements are considered not to be specified. :math:`p(z|x) = p(z|x_1, x_2=0) \propto p(z)p(z|x_1)` name : str, defaults to "p" Name of this distribution. This name is displayed in prob_text and prob_factorized_text. features_shape : :obj:`torch.Size` or :obj:`list`, defaults to torch.Size()) Shape of dimensions (features) of this distribution. """ if len(p.cond_var) != 1: raise ValueError() super().__init__(p=p, name=name, features_shape=features_shape)
def _get_input_dict(self, x, var=None): return super(ProductOfNormal)._get_input_dict(x, var) @staticmethod def _get_mask(inputs, index): """Get a mask to the input to specify an expert identified by index. Parameters ---------- inputs : torch.Tensor index : int Returns ------- torch.Tensor """ mask = torch.zeros_like(inputs).type(inputs.dtype) mask[:, index] = 1 return mask def _get_params_with_masking(self, inputs, index, **kwargs): """Get the output parameters of the index-specified expert. Parameters ---------- inputs : torch.Tensor index : int **kwargs Arbitrary keyword arguments. Returns ------- outputs : torch.Tensor Examples -------- >>> # pon = ElementWiseProductOfNormal(p) >>> # a = torch.tensor([[1, 0, 0], [0, 1, 0]]) >>> # pon._get_params_with_masking(a, 0) tensor([[[0.01, 0.0131], [0, 0]], # loc [[0.42, 0.39], [1, 1]], # scale ]) >>> # pon._get_params_with_masking(a, 1) tensor([[[0, 0], [0.021, 0.11]], # loc [[1, 1], [0.293, 0.415]], # scale ]) >>> # self._get_params_with_masking(a, 2) tensor([[[0, 0], [0, 0]], # loc [[1, 1], [1, 1]], # scale ]) """ mask = self._get_mask(inputs, index) # (n_batch, n_expert) outputs_dict = self.p.get_params({self.cond_var[0]: inputs * mask}, **kwargs) outputs = torch.stack([outputs_dict["loc"], outputs_dict["scale"]]) # (2, n_batch, output_dim) # When the index-th expert in the output examples is not specified, set zero to them. outputs[:, inputs[:, index] == 0, :] = 0 return outputs def _get_expert_params(self, params_dict={}, **kwargs): """Get the output parameters of all experts. Parameters ---------- params_dict : dict **kwargs Arbitrary keyword arguments. Returns ------- torch.Tensor torch.Tensor """ inputs = get_dict_values(params_dict, self.cond_var)[0] # (n_batch, n_expert=input_dim) n_expert = inputs.size()[1] outputs = [self._get_params_with_masking(inputs, i) for i in range(n_expert)] outputs = torch.stack(outputs) # (n_expert, 2, n_batch, output_dim) return outputs[:, 0, :, :], outputs[:, 1, :, :] # (n_expert, n_batch, output_dim)